In [1]:
import os
import time
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor 
from catboost import CatBoostRegressor


from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
pd.set_option('display.max_columns', None)


# Chart drawing
import plotly as py
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# Mute sklearn warnings
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=DeprecationWarning)

# Show charts when running kernel
#init_notebook_mode(connected=True)

# Change default background color for all visualizations
layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(250,250,250,0.8)')
fig = go.Figure(layout=layout)
templated_fig = pio.to_templated(fig)
pio.templates['my_template'] = templated_fig.layout.template
pio.templates.default = 'my_template'

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="xgboost")
warnings.filterwarnings("ignore")

Functions¶

In [2]:
def evaluate_regression_model(y_true, y_pred):
    """
    Calculate and print evaluation metrics for a regression model.

    Parameters:
    - y_true: Actual values.
    - y_pred: Predicted values.

    Returns:
    - Dictionary containing the evaluation metrics.
    """
    # Calculate evaluation metrics
    mse = mean_squared_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    # Print the results
    print(f'Mean Squared Error (MSE): {np.round(mse,3)}')
    print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
    print(f'Mean Absolute Error (MAE): {np.round(mae,3)}')
    print(f'R-squared (R2): {np.round(r2,3)}')

    # Return results as a dictionary
    results = {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2
    }

    return results
In [3]:
def evaluate_regression_model2(y_true, y_pred):
    """
    Calculate and print evaluation metrics for a regression model.

    Parameters:
    - y_true: Actual values.
    - y_pred: Predicted values.

    Returns:
    - Dictionary containing the evaluation metrics.
    """
    # Calculate evaluation metrics
    mse = mean_squared_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

#     # Print the results
#     print(f'Mean Squared Error (MSE): {np.round(mse,3)}')
#     print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
#     print(f'Mean Absolute Error (MAE): {np.round(mae,3)}')
#     print(f'R-squared (R2): {np.round(r2,3)}')

    # Return results as a dictionary
    results = {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2
    }

    return results
In [4]:
# Returns RSI values
def rsi(df, periods = 14):
    close = df['close']
    close_delta = close.diff()

    # Make two series: one for lower closes and one for higher closes
    up = close_delta.clip(lower=0)
    down = -1 * close_delta.clip(upper=0)
    
    ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
    ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()

    rsi = ma_up / ma_down
    rsi = 100 - (100/(1 + rsi))
    return rsi
In [5]:
def gain(x):
    return ((x > 0) * x).sum()


def loss(x):
    return ((x < 0) * x).sum()


# Calculate money flow index
# Contributed by Github member and chatgpt
def mfi(df, n=14):
    
    high = df['high']
    low = df['low']
    close = df['close']
    volume = df['volume']
    
    typical_price = (high + low + close) / 3
    money_flow = typical_price * volume
    mf_sign = np.where(typical_price > typical_price.shift(1), 1, -1)
    signed_mf = money_flow * mf_sign

    # Calculate gain and loss using vectorized operations
    positive_mf = np.where(signed_mf > 0, signed_mf, 0)
    negative_mf = np.where(signed_mf < 0, -signed_mf, 0)

    mf_avg_gain = pd.Series(positive_mf).rolling(n, min_periods=1).sum()
    mf_avg_loss = pd.Series(negative_mf).rolling(n, min_periods=1).sum()

    return (100 - 100 / (1 + mf_avg_gain / mf_avg_loss)).to_numpy()
In [6]:
def plot_regression_accuracy(y_true, y_pred):
    """
    Create various plots to evaluate the accuracy of a linear regression model.

    Parameters:
    - y_true: Actual values.
    - y_pred: Predicted values.
    """
    # Scatter Plot
    plt.scatter(y_true, y_pred)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title('Scatter Plot of Actual vs Predicted Values')
    plt.show()

    # Residual Plot
    residuals = y_true - y_pred
    plt.scatter(y_pred, residuals)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.title('Residual Plot')
    plt.show()

    # Distribution of Residuals
    sns.histplot(residuals, kde=True)
    plt.xlabel('Residuals')
    plt.ylabel('Frequency')
    plt.title('Distribution of Residuals')
    plt.show()

    # Predicted vs Actual Line
    plt.plot(y_true, y_true, linestyle='--', color='r', label='Perfect Fit')
    plt.scatter(y_true, y_pred)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title('Predicted vs Actual Values with Perfect Fit Line')
    plt.legend()
    plt.show()
In [7]:
def plot_predictions(df,prediction):
    plot_test_df= df[df.date.dt.year>=2020]
    plot_test_df['prediction'] = prediction

    fig = make_subplots(rows=2, cols=1)
    fig.add_trace(go.Scatter(x=df.date, y=df.close_1d_next,
                             name='Truth',
                             marker_color='LightSkyBlue'), row=1, col=1)

    fig.add_trace(go.Scatter(x=plot_test_df.date,
                             y=plot_test_df.prediction,
                             name='Prediction',
                             marker_color='MediumPurple'), row=1, col=1)
    
    # Add title and Y-axis title for the first subplot
    fig.update_layout(title_text='Train Data and Test Data', title_x=0.5, title_y=0.9)
    fig.update_yaxes(title_text='Prediction', row=1, col=1)

    fig.add_trace(go.Scatter(x=plot_test_df.date,
                             y=y_test,
                             name='Truth',
                             marker_color='LightSkyBlue',
                             showlegend=False), row=2, col=1)

    fig.add_trace(go.Scatter(x=plot_test_df.date,
                             y=prediction,
                             name='Prediction',
                             marker_color='MediumPurple',
                             showlegend=False), row=2, col=1)
    
    fig.update_yaxes(title_text='Prediction', row=2, col=1)

    fig.show()
In [8]:
def plot_feature_importance(model,X_train,top_features):

    # Get feature importance scores (coefficients)
    feature_importance = model.coef_

    # Create a DataFrame to store feature names and importance scores
    feature_importance_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': np.abs(feature_importance)})

    # Sort features by importance
    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False).reset_index(drop=True)

    top_features = top_features
    # Plot feature importance
    plt.figure(figsize=(20, 6))
    plt.barh(range(len(feature_importance_df[:top_features])), feature_importance_df[:top_features]['Importance'], align="center")
    plt.yticks(range(len(feature_importance_df[:top_features])), labels=feature_importance_df[:top_features]['Feature'])
    plt.ylabel("Features")
    plt.xlabel("Coefficient Magnitude")
    plt.title(f"Top {top_features} Feature Importance Values")
    plt.show()
    
    return feature_importance_df

Read Data¶

In [9]:
out_loc  = '/Users/isapocan/Desktop/LSU/data/'
In [10]:
df = pd.read_parquet(out_loc+"stock_1d.parquet")
df.columns = df.columns.str.lower()
df.head()
Out[10]:
date open high low close adj close volume symbol security gics sector gics sub-industry headquarters location date added cik founded
0 2013-01-02 94.190002 94.790001 93.959999 94.779999 67.895119 3206700.0 MMM 3M Industrials Industrial Conglomerates Saint Paul, Minnesota 1957-03-04 66740 1902
1 2013-01-03 94.339996 94.930000 94.129997 94.669998 67.816322 2704600.0 MMM 3M Industrials Industrial Conglomerates Saint Paul, Minnesota 1957-03-04 66740 1902
2 2013-01-04 94.790001 95.480003 94.540001 95.370003 68.317757 2704900.0 MMM 3M Industrials Industrial Conglomerates Saint Paul, Minnesota 1957-03-04 66740 1902
3 2013-01-07 95.019997 95.730003 94.760002 95.489998 68.403717 2745800.0 MMM 3M Industrials Industrial Conglomerates Saint Paul, Minnesota 1957-03-04 66740 1902
4 2013-01-08 95.169998 95.750000 95.099998 95.500000 68.410889 2655500.0 MMM 3M Industrials Industrial Conglomerates Saint Paul, Minnesota 1957-03-04 66740 1902

Select Stock¶

In [11]:
df = df[df['symbol']=='MDLZ']

1. Feature engineering¶

1.a. Financial indicators¶

In [12]:
df['ema_9'] = df['close'].ewm(9).mean().shift()
df['sma_5'] = df['close'].rolling(5).mean().shift()
df['sma_10'] = df['close'].rolling(10).mean().shift()
df['sma_15'] = df['close'].rolling(15).mean().shift()
df['sma_30'] = df['close'].rolling(30).mean().shift()
In [13]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2733 entries, 852843 to 855575
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   date                   2733 non-null   datetime64[ns]
 1   open                   2733 non-null   float64       
 2   high                   2733 non-null   float64       
 3   low                    2733 non-null   float64       
 4   close                  2733 non-null   float64       
 5   adj close              2733 non-null   float64       
 6   volume                 2733 non-null   float64       
 7   symbol                 2733 non-null   object        
 8   security               2733 non-null   object        
 9   gics sector            2733 non-null   object        
 10  gics sub-industry      2733 non-null   object        
 11  headquarters location  2733 non-null   object        
 12  date added             2733 non-null   object        
 13  cik                    2733 non-null   int64         
 14  founded                2733 non-null   object        
 15  ema_9                  2732 non-null   float64       
 16  sma_5                  2728 non-null   float64       
 17  sma_10                 2723 non-null   float64       
 18  sma_15                 2718 non-null   float64       
 19  sma_30                 2703 non-null   float64       
dtypes: datetime64[ns](1), float64(11), int64(1), object(7)
memory usage: 448.4+ KB
In [14]:
df['rsi'] = rsi(df) #.fillna(0)
df['mfi'] = mfi(df, 14)
In [15]:
df[['date','close','ema_9','sma_5','sma_10','sma_15','sma_30','rsi','mfi']]
Out[15]:
date close ema_9 sma_5 sma_10 sma_15 sma_30 rsi mfi
852843 2013-01-02 26.670000 NaN NaN NaN NaN NaN NaN 0.000000
852844 2013-01-03 26.639999 26.670000 NaN NaN NaN NaN NaN 33.904295
852845 2013-01-04 26.740000 26.654210 NaN NaN NaN NaN NaN 48.695375
852846 2013-01-07 26.660000 26.685867 NaN NaN NaN NaN NaN 39.919745
852847 2013-01-08 26.680000 26.678345 NaN NaN NaN NaN NaN 55.233142
... ... ... ... ... ... ... ... ... ...
855571 2023-11-02 67.970001 65.538988 65.934001 65.321001 64.406001 65.993000 60.257764 89.207420
855572 2023-11-03 68.820000 65.782090 66.398001 65.697001 64.868001 65.901667 63.726091 89.458580
855573 2023-11-06 68.239998 66.085881 67.160001 66.169001 65.354001 65.848000 59.885606 83.710782
855574 2023-11-07 68.489998 66.301292 67.612000 66.594001 65.728667 65.799000 60.977252 75.937617
855575 2023-11-08 69.019997 66.520163 68.067999 66.888000 66.058667 65.729667 63.259914 75.566164

2733 rows × 9 columns

In [16]:
df[['rsi','mfi']].corr()
Out[16]:
rsi mfi
rsi 1.000000 0.698958
mfi 0.698958 1.000000
In [ ]:
 
In [17]:
EMA_12 = pd.Series(df['close'].ewm(span=12, min_periods=12).mean())
EMA_26 = pd.Series(df['close'].ewm(span=26, min_periods=26).mean())
df['macd'] = pd.Series(EMA_12 - EMA_26)
df['macd_signal'] = pd.Series(df.macd.ewm(span=9, min_periods=9).mean())
In [18]:
df[(~df['macd'].isna()) & (~df['macd_signal'].isna())][['macd','macd_signal']].head()
Out[18]:
macd macd_signal
852876 -0.147786 -0.050945
852877 -0.175230 -0.078792
852878 -0.198438 -0.104970
852879 -0.235462 -0.132994
852880 -0.226841 -0.152855

1.b. Shift Close price in order to predict next day¶

In [19]:
### predict next day
df['close_1d_next'] = df['close'].shift(-1)
df[['date','close','close_1d_next']].head()
Out[19]:
date close close_1d_next
852843 2013-01-02 26.670000 26.639999
852844 2013-01-03 26.639999 26.740000
852845 2013-01-04 26.740000 26.660000
852846 2013-01-07 26.660000 26.680000
852847 2013-01-08 26.680000 27.049999
In [ ]:
 

1.c. Add lag features¶

In [20]:
df['close_1d_ago'] = df['close'].shift(1)
df['close_3d_ago'] = df['close'].shift(3)
df['close_5d_ago'] = df['close'].shift(5)
df['close_1w_ago'] = df['close'].shift(7)
df['close_2w_ago'] = df['close'].shift(14)
df['close_3w_ago'] = df['close'].shift(21)
df['close_4w_ago'] = df['close'].shift(28)

df['adj_close_1d_ago'] = df['adj close'].shift(1)
df['adj_close_3d_ago'] = df['adj close'].shift(3)
df['adj_close_5d_ago'] = df['adj close'].shift(5)
df['adj_close_1w_ago'] = df['adj close'].shift(7)
df['adj_close_2w_ago'] = df['adj close'].shift(14)
df['adj_close_3w_ago'] = df['adj close'].shift(21)
df['adj_close_4w_ago'] = df['adj close'].shift(28)

df['open_1d_ago'] = df['open'].shift(1)
df['open_3d_ago'] = df['open'].shift(3)
df['open_5d_ago'] = df['open'].shift(5)
df['open_1w_ago'] = df['open'].shift(7)
df['open_2w_ago'] = df['open'].shift(14)
df['open_3w_ago'] = df['open'].shift(21)
df['open_4w_ago'] = df['open'].shift(28)

df['high_1d_ago'] = df['high'].shift(1)
df['high_3d_ago'] = df['high'].shift(3)
df['high_5d_ago'] = df['high'].shift(5)
df['high_1w_ago'] = df['high'].shift(7)
df['high_2w_ago'] = df['high'].shift(14)
df['high_3w_ago'] = df['high'].shift(21)
df['high_4w_ago'] = df['high'].shift(28)

df['low_1d_ago'] = df['low'].shift(1)
df['low_3d_ago'] = df['low'].shift(3)
df['low_5d_ago'] = df['low'].shift(5)
df['low_1w_ago'] = df['low'].shift(7)
df['low_2w_ago'] = df['low'].shift(14)
df['low_3w_ago'] = df['low'].shift(21)
df['low_4w_ago'] = df['low'].shift(28)

df['volume_1d_ago'] = df['volume'].shift(1)
df['volume_3d_ago'] = df['volume'].shift(3)
df['volume_5d_ago'] = df['volume'].shift(5)
df['volume_1w_ago'] = df['volume'].shift(7)
df['volume_2w_ago'] = df['volume'].shift(14)
df['volume_3w_ago'] = df['volume'].shift(21)
df['volume_4w_ago'] = df['volume'].shift(28)

df['open_3d_avg'] = df['open'].rolling(window=3).mean()
df['open_5d_avg'] = df['open'].rolling(window=5).mean()
df['open_7d_avg'] = df['open'].rolling(window=7).mean()
df['open_10d_avg'] = df['open'].rolling(window=10).mean()
df['open_15d_avg'] = df['open'].rolling(window=15).mean()
df['open_30d_avg'] = df['open'].rolling(window=30).mean()

df['high_3d_avg'] = df['high'].rolling(window=3).mean()
df['high_5d_avg'] = df['high'].rolling(window=5).mean()
df['high_7d_avg'] = df['high'].rolling(window=7).mean()
df['high_10d_avg'] = df['high'].rolling(window=10).mean()
df['high_15d_avg'] = df['high'].rolling(window=15).mean()
df['high_30d_avg'] = df['high'].rolling(window=30).mean()

df['low_3d_avg'] = df['low'].rolling(window=3).mean()
df['low_5d_avg'] = df['low'].rolling(window=5).mean()
df['low_7d_avg'] = df['low'].rolling(window=7).mean()
df['low_10d_avg'] = df['low'].rolling(window=10).mean()
df['low_15d_avg'] = df['low'].rolling(window=15).mean()
df['low_30d_avg'] = df['low'].rolling(window=30).mean()

df['volume_3d_avg'] = df['volume'].rolling(window=3).mean()
df['volume_5d_avg'] = df['volume'].rolling(window=5).mean()
df['volume_7d_avg'] = df['volume'].rolling(window=7).mean()
df['volume_10d_avg'] = df['volume'].rolling(window=10).mean()
df['volume_15d_avg'] = df['volume'].rolling(window=15).mean()
df['volume_30d_avg'] = df['volume'].rolling(window=30).mean()

df['adj_close_3d_avg'] = df['adj close'].rolling(window=3).mean()
df['adj_close_5d_avg'] = df['adj close'].rolling(window=5).mean()
df['adj_close_7d_avg'] = df['adj close'].rolling(window=7).mean()
df['adj_close_10d_avg'] = df['adj close'].rolling(window=10).mean()
df['adj_close_15d_avg'] = df['adj close'].rolling(window=15).mean()
df['adj_close_30d_avg'] = df['adj close'].rolling(window=30).mean()
In [ ]:
 

1.d. Remove missing records after feature engineering¶

In [21]:
df = df.dropna().reset_index(drop=True)
df.head()
Out[21]:
date open high low close adj close volume symbol security gics sector gics sub-industry headquarters location date added cik founded ema_9 sma_5 sma_10 sma_15 sma_30 rsi mfi macd macd_signal close_1d_next close_1d_ago close_3d_ago close_5d_ago close_1w_ago close_2w_ago close_3w_ago close_4w_ago adj_close_1d_ago adj_close_3d_ago adj_close_5d_ago adj_close_1w_ago adj_close_2w_ago adj_close_3w_ago adj_close_4w_ago open_1d_ago open_3d_ago open_5d_ago open_1w_ago open_2w_ago open_3w_ago open_4w_ago high_1d_ago high_3d_ago high_5d_ago high_1w_ago high_2w_ago high_3w_ago high_4w_ago low_1d_ago low_3d_ago low_5d_ago low_1w_ago low_2w_ago low_3w_ago low_4w_ago volume_1d_ago volume_3d_ago volume_5d_ago volume_1w_ago volume_2w_ago volume_3w_ago volume_4w_ago open_3d_avg open_5d_avg open_7d_avg open_10d_avg open_15d_avg open_30d_avg high_3d_avg high_5d_avg high_7d_avg high_10d_avg high_15d_avg high_30d_avg low_3d_avg low_5d_avg low_7d_avg low_10d_avg low_15d_avg low_30d_avg volume_3d_avg volume_5d_avg volume_7d_avg volume_10d_avg volume_15d_avg volume_30d_avg adj_close_3d_avg adj_close_5d_avg adj_close_7d_avg adj_close_10d_avg adj_close_15d_avg adj_close_30d_avg
0 2013-02-20 27.070000 27.150000 26.950001 27.030001 21.735399 17057200.0 MDLZ Mondelez International Consumer Staples Packaged Foods & Meats Chicago, Illinois 2012-10-02 1103982 2012 27.499536 27.136 27.518 27.642000 27.589000 41.633625 53.176274 -0.147786 -0.050945 26.820000 26.959999 26.570000 27.680000 27.76 27.730000 28.080000 27.049999 21.679117 21.365499 22.258080 22.322405 22.298285 22.579723 21.751484 26.750000 26.690001 27.700001 27.799999 27.830000 27.969999 26.790001 27.190001 27.020000 27.830000 28.100000 27.980000 28.100000 27.080000 26.750000 26.450001 27.270000 27.750000 27.67 27.820000 26.68 18297500.0 37728900.0 14931000.0 11159200.0 5800400.0 15906900.0 11671400.0 26.886667 27.018 27.217143 27.386 27.553333 27.536667 27.136667 27.248000 27.410000 27.618 27.779333 27.754667 26.766667 26.842 27.015714 27.224 27.411333 27.382333 1.904973e+07 21756140.0 1.907480e+07 17005580.0 1.419575e+07 1.352419e+07 21.633545 21.716101 21.878994 22.053831 22.184635 22.194819
1 2013-02-21 26.990000 27.049999 26.639999 26.820000 21.566534 16936600.0 MDLZ Mondelez International Consumer Staples Packaged Foods & Meats Chicago, Illinois 2012-10-02 1103982 2012 27.451239 27.006 27.426 27.588667 27.601333 38.257648 47.431888 -0.175230 -0.078792 26.770000 27.030001 26.719999 27.750000 27.75 27.790001 27.559999 27.309999 21.735399 21.486118 22.314371 22.314371 22.346525 22.161583 21.960548 27.070000 26.840000 27.740000 27.730000 27.650000 27.730000 27.129999 27.150000 27.070000 27.809999 27.799999 27.950001 28.040001 27.340000 26.950001 26.600000 27.459999 27.629999 27.65 27.299999 27.09 17057200.0 21794500.0 13902600.0 9811900.0 7541300.0 18213200.0 16348500.0 26.936666 26.868 27.111429 27.295 27.497333 27.552333 27.130000 27.096000 27.302857 27.510 27.717333 27.759000 26.780000 26.678 26.874286 27.108 27.342667 27.388333 1.743043e+07 22362940.0 2.009261e+07 17608410.0 1.493817e+07 1.361005e+07 21.660350 21.566534 21.772160 21.958945 22.135851 22.198572
2 2013-02-22 26.889999 27.129999 26.730000 26.770000 21.526327 16664800.0 MDLZ Mondelez International Consumer Staples Packaged Foods & Meats Chicago, Illinois 2012-10-02 1103982 2012 27.386494 26.820 27.308 27.528000 27.606000 37.478423 48.958416 -0.198438 -0.104970 26.490000 26.820000 26.959999 26.570000 27.68 28.219999 27.790001 27.420000 21.566534 21.679117 21.365499 22.258080 22.692308 22.346525 22.049007 26.990000 26.750000 26.690001 27.700001 28.000000 27.500000 27.350000 27.049999 27.190001 27.020000 27.830000 28.320000 27.889999 27.540001 26.639999 26.750000 26.450001 27.270000 27.93 27.350000 27.25 16936600.0 18297500.0 37728900.0 14931000.0 9623100.0 15212300.0 10162600.0 26.983333 26.908 26.995714 27.220 27.446667 27.555667 27.109999 27.118000 27.202857 27.415 27.662666 27.760667 26.773333 26.734 26.797143 27.023 27.281333 27.390000 1.688620e+07 18150120.0 2.034030e+07 17828420.0 1.554640e+07 1.377650e+07 21.609420 21.598699 21.667624 21.856822 22.081171 22.191067
3 2013-02-25 26.790001 27.080000 26.480000 26.490000 21.301172 15527100.0 MDLZ Mondelez International Consumer Staples Packaged Foods & Meats Chicago, Illinois 2012-10-02 1103982 2012 27.323424 26.860 27.181 27.460000 27.596667 33.378362 47.675126 -0.235462 -0.132994 26.950001 26.770000 27.030001 26.719999 27.75 27.879999 27.830000 27.480000 21.526327 21.735399 21.486118 22.314371 22.418896 22.378695 22.097254 26.889999 27.070000 26.840000 27.740000 28.010000 27.930000 27.459999 27.129999 27.150000 27.070000 27.809999 28.150000 28.030001 27.520000 26.730000 26.950001 26.600000 27.459999 27.83 27.639999 27.17 16664800.0 17057200.0 21794500.0 13902600.0 8954300.0 14444500.0 8688200.0 26.890000 26.898 26.860000 27.119 27.366000 27.544333 27.086666 27.120000 27.098571 27.313 27.580000 27.752000 26.616666 26.710 26.657143 26.896 27.184667 27.369667 1.637617e+07 16896640.0 2.057237e+07 18265210.0 1.594000e+07 1.374912e+07 21.464678 21.561710 21.522881 21.754699 21.988429 22.169087
4 2013-02-26 26.530001 26.980000 26.510000 26.950001 21.671074 13702900.0 MDLZ Mondelez International Consumer Staples Packaged Foods & Meats Chicago, Illinois 2012-10-02 1103982 2012 27.238357 26.814 27.054 27.344667 27.569333 44.181951 48.178912 -0.226841 -0.152855 27.570000 26.490000 26.820000 26.959999 26.57 27.950001 27.780001 27.709999 21.301172 21.566534 21.679117 21.365499 22.475189 22.338484 22.282200 26.790001 26.990000 26.750000 26.690001 27.950001 27.830000 27.580000 27.080000 27.049999 27.190001 27.020000 28.110001 27.889999 27.740000 26.480000 26.639999 26.750000 26.450001 27.85 27.690001 27.34 15527100.0 16936600.0 18297500.0 37728900.0 10961400.0 12066800.0 9863200.0 26.736667 26.854 26.837143 26.999 27.267333 27.517000 27.063333 27.077999 27.092857 27.231 27.502000 27.733333 26.573333 26.662 26.665714 26.784 27.096667 27.345000 1.529827e+07 15977720.0 1.714009e+07 18654310.0 1.625657e+07 1.386713e+07 21.499524 21.560101 21.566535 21.690369 21.938574 22.156490
In [ ]:
 

1.e. Split Data into Train ( before Covid ) and Test ( after Covid )¶

In [22]:
# # Calculate the index for the 70-30 split
# split_index = int(0.7 * len(df))

# # Split the DataFrame into training and testing sets
# train_df = df.iloc[:split_index]
# test_df = df.iloc[split_index:]

# Split the DataFrame into training and testing sets
train_df = df[df.date.dt.year<2020]
test_df = df[df.date.dt.year>=2020]



print(f"Train days: {len(train_df)}, Test days: {len(test_df)}")

fig = go.Figure()
fig.add_trace(go.Scatter(x=train_df.date, y=train_df.close_1d_next, name='Training'))
fig.add_trace(go.Scatter(x=test_df.date,  y=test_df.close_1d_next,  name='Test'))
fig.show()
Train days: 1729, Test days: 970
In [23]:
drop_cols1 = ['date','open','high','low','close','adj close','volume','symbol','security',
 'gics sector','gics sub-industry','headquarters location','date added','cik','founded']

train_df = train_df.drop(drop_cols1, 1)

test_df  = test_df.drop(drop_cols1, 1)

# target column is next day's close price
y_train = train_df['close_1d_next'].copy()
X_train = train_df.drop(['close_1d_next'], 1)

# target column is next day's close price
y_test  = test_df['close_1d_next'].copy()
X_test  = test_df.drop(['close_1d_next'], 1)
In [24]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

2. Train multiple regression models¶

In [25]:
def train_and_evaluate_models(X_train_scaled,y_train,X_test_scaled,y_test):
    """
    Train and evaluate multiple regression models on a given dataframe.

    Parameters:
    - dataframe: Pandas DataFrame containing the dataset.
    - target_column: Name of the target column (dependent variable).
    - features_columns: List of column names used as features (independent variables).

    Returns:
    - A DataFrame containing evaluation metrics for each model.
    """

    # Split the data into features (X) and target variable (y)
#     X = dataframe[features_columns]
#     y = dataframe[target_column]

    # Split the data into training and testing sets (70-30 split)
    #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    
#     scaler = StandardScaler()
#     X_train_scaled = scaler.fit_transform(X_train)
#     X_test_scaled = scaler.transform(X_test)


    # Initialize the models
    models = {
        'Linear Regression': LinearRegression(),
        'Ridge Regression': Ridge(),
        'Lasso Regression': Lasso(),
        'Elastic Net': ElasticNet(),
        'SVR': SVR(),
        'K-Neighbors Regressor': KNeighborsRegressor(),
        'Decision Tree': DecisionTreeRegressor(),
        'Random Forest': RandomForestRegressor(),
        'Gradient Boosting': GradientBoostingRegressor(),
        'AdaBoost': AdaBoostRegressor(),
        'XGBoost': XGBRegressor(),
        'CatBoost': CatBoostRegressor()
    }

    # Initialize a DataFrame to store the evaluation metrics
    metrics_df = pd.DataFrame(columns=['Model', 'Mean Squared Error', 'Mean Absolute Error', 'R2 Score'])

    # Train and evaluate each model
    for model_name, model in models.items():
        start_time = time.time() 
       
    # Train the model
        model.fit(X_train_scaled, y_train)
        
        end_time = time.time()  # Record the end time

        training_time = end_time - start_time

        # Make predictions
        y_pred = model.predict(X_test_scaled)

        # Evaluate the model
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        # Store the metrics in the DataFrame
        metrics_df = metrics_df.append({
            'Model': model_name,
            'Mean Squared Error': mse,
            'Mean Absolute Error': mae,
            'R2 Score': r2,
            'Training Time (s)': training_time
        }, ignore_index=True)
        metrics_df = metrics_df.sort_values(by=['R2 Score'],ascending=False)
    return metrics_df
In [26]:
df_compare = train_and_evaluate_models(X_train,y_train,X_test,y_test)
Learning rate set to 0.044643
0:	learn: 6.0670144	total: 62.4ms	remaining: 1m 2s
1:	learn: 5.8228277	total: 65.9ms	remaining: 32.9s
2:	learn: 5.5825438	total: 69.8ms	remaining: 23.2s
3:	learn: 5.3589845	total: 73.3ms	remaining: 18.3s
4:	learn: 5.1400669	total: 76.9ms	remaining: 15.3s
5:	learn: 4.9338718	total: 80.6ms	remaining: 13.4s
6:	learn: 4.7348036	total: 84.5ms	remaining: 12s
7:	learn: 4.5462470	total: 87.8ms	remaining: 10.9s
8:	learn: 4.3663666	total: 91.5ms	remaining: 10.1s
9:	learn: 4.1940087	total: 94.9ms	remaining: 9.39s
10:	learn: 4.0326464	total: 98.6ms	remaining: 8.87s
11:	learn: 3.8728104	total: 102ms	remaining: 8.4s
12:	learn: 3.7241421	total: 106ms	remaining: 8.01s
13:	learn: 3.5819820	total: 109ms	remaining: 7.69s
14:	learn: 3.4443416	total: 113ms	remaining: 7.41s
15:	learn: 3.3131010	total: 116ms	remaining: 7.15s
16:	learn: 3.1862951	total: 120ms	remaining: 6.93s
17:	learn: 3.0636007	total: 123ms	remaining: 6.72s
18:	learn: 2.9525545	total: 127ms	remaining: 6.55s
19:	learn: 2.8449392	total: 130ms	remaining: 6.39s
20:	learn: 2.7377198	total: 134ms	remaining: 6.24s
21:	learn: 2.6359381	total: 137ms	remaining: 6.1s
22:	learn: 2.5393906	total: 141ms	remaining: 5.97s
23:	learn: 2.4447919	total: 144ms	remaining: 5.85s
24:	learn: 2.3592925	total: 148ms	remaining: 5.76s
25:	learn: 2.2778166	total: 151ms	remaining: 5.67s
26:	learn: 2.1955454	total: 155ms	remaining: 5.58s
27:	learn: 2.1181936	total: 158ms	remaining: 5.5s
28:	learn: 2.0456841	total: 162ms	remaining: 5.41s
29:	learn: 1.9773187	total: 165ms	remaining: 5.35s
30:	learn: 1.9140404	total: 169ms	remaining: 5.27s
31:	learn: 1.8503968	total: 172ms	remaining: 5.2s
32:	learn: 1.7872718	total: 175ms	remaining: 5.13s
33:	learn: 1.7298101	total: 178ms	remaining: 5.07s
34:	learn: 1.6773427	total: 182ms	remaining: 5.01s
35:	learn: 1.6263746	total: 185ms	remaining: 4.95s
36:	learn: 1.5799050	total: 188ms	remaining: 4.89s
37:	learn: 1.5350898	total: 191ms	remaining: 4.84s
38:	learn: 1.4896754	total: 195ms	remaining: 4.8s
39:	learn: 1.4440908	total: 199ms	remaining: 4.76s
40:	learn: 1.4038854	total: 202ms	remaining: 4.72s
41:	learn: 1.3654869	total: 206ms	remaining: 4.69s
42:	learn: 1.3304415	total: 210ms	remaining: 4.67s
43:	learn: 1.2940654	total: 214ms	remaining: 4.65s
44:	learn: 1.2608561	total: 217ms	remaining: 4.61s
45:	learn: 1.2267358	total: 220ms	remaining: 4.57s
46:	learn: 1.1934982	total: 223ms	remaining: 4.53s
47:	learn: 1.1672852	total: 227ms	remaining: 4.5s
48:	learn: 1.1375280	total: 230ms	remaining: 4.47s
49:	learn: 1.1138729	total: 233ms	remaining: 4.43s
50:	learn: 1.0899582	total: 236ms	remaining: 4.4s
51:	learn: 1.0649512	total: 239ms	remaining: 4.37s
52:	learn: 1.0415461	total: 243ms	remaining: 4.34s
53:	learn: 1.0206758	total: 246ms	remaining: 4.31s
54:	learn: 0.9991569	total: 249ms	remaining: 4.28s
55:	learn: 0.9795833	total: 253ms	remaining: 4.26s
56:	learn: 0.9603286	total: 256ms	remaining: 4.23s
57:	learn: 0.9430651	total: 259ms	remaining: 4.21s
58:	learn: 0.9257478	total: 263ms	remaining: 4.19s
59:	learn: 0.9087157	total: 266ms	remaining: 4.16s
60:	learn: 0.8934800	total: 269ms	remaining: 4.14s
61:	learn: 0.8810773	total: 272ms	remaining: 4.11s
62:	learn: 0.8666743	total: 275ms	remaining: 4.09s
63:	learn: 0.8534041	total: 278ms	remaining: 4.07s
64:	learn: 0.8414198	total: 282ms	remaining: 4.05s
65:	learn: 0.8286451	total: 285ms	remaining: 4.03s
66:	learn: 0.8182783	total: 288ms	remaining: 4.01s
67:	learn: 0.8072075	total: 291ms	remaining: 3.99s
68:	learn: 0.7983925	total: 294ms	remaining: 3.97s
69:	learn: 0.7894196	total: 297ms	remaining: 3.95s
70:	learn: 0.7802497	total: 301ms	remaining: 3.93s
71:	learn: 0.7722262	total: 304ms	remaining: 3.92s
72:	learn: 0.7636665	total: 307ms	remaining: 3.9s
73:	learn: 0.7559532	total: 310ms	remaining: 3.88s
74:	learn: 0.7484620	total: 314ms	remaining: 3.87s
75:	learn: 0.7409614	total: 317ms	remaining: 3.85s
76:	learn: 0.7340694	total: 320ms	remaining: 3.83s
77:	learn: 0.7272183	total: 323ms	remaining: 3.82s
78:	learn: 0.7206823	total: 327ms	remaining: 3.81s
79:	learn: 0.7145614	total: 330ms	remaining: 3.79s
80:	learn: 0.7090921	total: 333ms	remaining: 3.78s
81:	learn: 0.7039718	total: 336ms	remaining: 3.76s
82:	learn: 0.6989219	total: 339ms	remaining: 3.75s
83:	learn: 0.6946642	total: 342ms	remaining: 3.73s
84:	learn: 0.6902238	total: 345ms	remaining: 3.72s
85:	learn: 0.6850230	total: 348ms	remaining: 3.7s
86:	learn: 0.6809441	total: 352ms	remaining: 3.69s
87:	learn: 0.6778489	total: 355ms	remaining: 3.67s
88:	learn: 0.6740331	total: 358ms	remaining: 3.66s
89:	learn: 0.6704494	total: 361ms	remaining: 3.65s
90:	learn: 0.6661259	total: 364ms	remaining: 3.63s
91:	learn: 0.6626042	total: 367ms	remaining: 3.62s
92:	learn: 0.6590292	total: 370ms	remaining: 3.61s
93:	learn: 0.6554413	total: 373ms	remaining: 3.6s
94:	learn: 0.6526417	total: 376ms	remaining: 3.59s
95:	learn: 0.6490439	total: 379ms	remaining: 3.57s
96:	learn: 0.6467224	total: 383ms	remaining: 3.56s
97:	learn: 0.6432002	total: 386ms	remaining: 3.55s
98:	learn: 0.6412776	total: 389ms	remaining: 3.54s
99:	learn: 0.6389192	total: 393ms	remaining: 3.53s
100:	learn: 0.6359101	total: 396ms	remaining: 3.52s
101:	learn: 0.6322537	total: 399ms	remaining: 3.52s
102:	learn: 0.6295722	total: 403ms	remaining: 3.51s
103:	learn: 0.6266020	total: 407ms	remaining: 3.5s
104:	learn: 0.6242294	total: 410ms	remaining: 3.49s
105:	learn: 0.6217499	total: 413ms	remaining: 3.48s
106:	learn: 0.6204236	total: 417ms	remaining: 3.48s
107:	learn: 0.6179910	total: 421ms	remaining: 3.47s
108:	learn: 0.6154061	total: 424ms	remaining: 3.46s
109:	learn: 0.6137596	total: 427ms	remaining: 3.45s
110:	learn: 0.6117283	total: 430ms	remaining: 3.44s
111:	learn: 0.6098140	total: 433ms	remaining: 3.44s
112:	learn: 0.6075496	total: 437ms	remaining: 3.43s
113:	learn: 0.6058904	total: 440ms	remaining: 3.42s
114:	learn: 0.6038122	total: 443ms	remaining: 3.41s
115:	learn: 0.6021583	total: 446ms	remaining: 3.4s
116:	learn: 0.6007633	total: 449ms	remaining: 3.39s
117:	learn: 0.5989211	total: 453ms	remaining: 3.38s
118:	learn: 0.5972269	total: 456ms	remaining: 3.37s
119:	learn: 0.5953529	total: 459ms	remaining: 3.37s
120:	learn: 0.5937351	total: 462ms	remaining: 3.36s
121:	learn: 0.5917424	total: 465ms	remaining: 3.35s
122:	learn: 0.5902046	total: 468ms	remaining: 3.34s
123:	learn: 0.5883267	total: 472ms	remaining: 3.33s
124:	learn: 0.5870216	total: 475ms	remaining: 3.33s
125:	learn: 0.5853003	total: 478ms	remaining: 3.32s
126:	learn: 0.5843247	total: 481ms	remaining: 3.31s
127:	learn: 0.5823154	total: 485ms	remaining: 3.3s
128:	learn: 0.5809541	total: 488ms	remaining: 3.29s
129:	learn: 0.5789569	total: 491ms	remaining: 3.29s
130:	learn: 0.5773752	total: 494ms	remaining: 3.28s
131:	learn: 0.5752670	total: 498ms	remaining: 3.27s
132:	learn: 0.5737710	total: 501ms	remaining: 3.26s
133:	learn: 0.5722145	total: 504ms	remaining: 3.26s
134:	learn: 0.5706925	total: 507ms	remaining: 3.25s
135:	learn: 0.5696125	total: 510ms	remaining: 3.24s
136:	learn: 0.5684057	total: 513ms	remaining: 3.23s
137:	learn: 0.5668800	total: 517ms	remaining: 3.23s
138:	learn: 0.5652156	total: 521ms	remaining: 3.23s
139:	learn: 0.5641754	total: 525ms	remaining: 3.22s
140:	learn: 0.5627745	total: 528ms	remaining: 3.21s
141:	learn: 0.5621379	total: 531ms	remaining: 3.21s
142:	learn: 0.5609717	total: 534ms	remaining: 3.2s
143:	learn: 0.5595847	total: 538ms	remaining: 3.2s
144:	learn: 0.5582880	total: 541ms	remaining: 3.19s
145:	learn: 0.5573794	total: 544ms	remaining: 3.18s
146:	learn: 0.5561539	total: 547ms	remaining: 3.17s
147:	learn: 0.5548667	total: 551ms	remaining: 3.17s
148:	learn: 0.5533491	total: 554ms	remaining: 3.16s
149:	learn: 0.5519277	total: 557ms	remaining: 3.15s
150:	learn: 0.5510423	total: 560ms	remaining: 3.15s
151:	learn: 0.5500217	total: 563ms	remaining: 3.14s
152:	learn: 0.5485391	total: 567ms	remaining: 3.14s
153:	learn: 0.5472895	total: 570ms	remaining: 3.13s
154:	learn: 0.5464558	total: 573ms	remaining: 3.12s
155:	learn: 0.5456874	total: 576ms	remaining: 3.12s
156:	learn: 0.5443351	total: 579ms	remaining: 3.11s
157:	learn: 0.5431722	total: 582ms	remaining: 3.1s
158:	learn: 0.5420827	total: 586ms	remaining: 3.1s
159:	learn: 0.5404464	total: 589ms	remaining: 3.09s
160:	learn: 0.5394812	total: 593ms	remaining: 3.09s
161:	learn: 0.5383878	total: 596ms	remaining: 3.08s
162:	learn: 0.5371991	total: 600ms	remaining: 3.08s
163:	learn: 0.5360893	total: 604ms	remaining: 3.08s
164:	learn: 0.5348795	total: 607ms	remaining: 3.07s
165:	learn: 0.5339208	total: 610ms	remaining: 3.07s
166:	learn: 0.5325131	total: 613ms	remaining: 3.06s
167:	learn: 0.5314276	total: 617ms	remaining: 3.06s
168:	learn: 0.5303847	total: 621ms	remaining: 3.05s
169:	learn: 0.5294339	total: 625ms	remaining: 3.05s
170:	learn: 0.5284374	total: 629ms	remaining: 3.05s
171:	learn: 0.5275295	total: 632ms	remaining: 3.04s
172:	learn: 0.5267194	total: 636ms	remaining: 3.04s
173:	learn: 0.5256977	total: 639ms	remaining: 3.03s
174:	learn: 0.5240502	total: 642ms	remaining: 3.03s
175:	learn: 0.5231121	total: 645ms	remaining: 3.02s
176:	learn: 0.5219285	total: 649ms	remaining: 3.02s
177:	learn: 0.5212026	total: 653ms	remaining: 3.01s
178:	learn: 0.5201152	total: 656ms	remaining: 3.01s
179:	learn: 0.5191207	total: 659ms	remaining: 3s
180:	learn: 0.5184563	total: 663ms	remaining: 3s
181:	learn: 0.5175407	total: 667ms	remaining: 3s
182:	learn: 0.5164491	total: 670ms	remaining: 2.99s
183:	learn: 0.5152700	total: 673ms	remaining: 2.98s
184:	learn: 0.5144159	total: 676ms	remaining: 2.98s
185:	learn: 0.5133076	total: 680ms	remaining: 2.98s
186:	learn: 0.5120716	total: 683ms	remaining: 2.97s
187:	learn: 0.5112054	total: 687ms	remaining: 2.97s
188:	learn: 0.5100507	total: 690ms	remaining: 2.96s
189:	learn: 0.5090646	total: 693ms	remaining: 2.95s
190:	learn: 0.5079116	total: 697ms	remaining: 2.95s
191:	learn: 0.5068761	total: 700ms	remaining: 2.95s
192:	learn: 0.5059156	total: 703ms	remaining: 2.94s
193:	learn: 0.5046199	total: 707ms	remaining: 2.94s
194:	learn: 0.5036700	total: 711ms	remaining: 2.93s
195:	learn: 0.5030163	total: 714ms	remaining: 2.93s
196:	learn: 0.5021730	total: 717ms	remaining: 2.92s
197:	learn: 0.5013725	total: 720ms	remaining: 2.92s
198:	learn: 0.5003361	total: 723ms	remaining: 2.91s
199:	learn: 0.4992527	total: 726ms	remaining: 2.9s
200:	learn: 0.4982242	total: 730ms	remaining: 2.9s
201:	learn: 0.4969857	total: 733ms	remaining: 2.9s
202:	learn: 0.4964471	total: 736ms	remaining: 2.89s
203:	learn: 0.4955578	total: 740ms	remaining: 2.88s
204:	learn: 0.4948073	total: 743ms	remaining: 2.88s
205:	learn: 0.4938657	total: 746ms	remaining: 2.88s
206:	learn: 0.4927562	total: 750ms	remaining: 2.87s
207:	learn: 0.4922795	total: 753ms	remaining: 2.87s
208:	learn: 0.4912653	total: 756ms	remaining: 2.86s
209:	learn: 0.4901704	total: 759ms	remaining: 2.85s
210:	learn: 0.4890394	total: 763ms	remaining: 2.85s
211:	learn: 0.4880374	total: 766ms	remaining: 2.85s
212:	learn: 0.4869604	total: 769ms	remaining: 2.84s
213:	learn: 0.4857559	total: 773ms	remaining: 2.84s
214:	learn: 0.4849108	total: 776ms	remaining: 2.83s
215:	learn: 0.4840655	total: 780ms	remaining: 2.83s
216:	learn: 0.4832522	total: 783ms	remaining: 2.83s
217:	learn: 0.4823870	total: 786ms	remaining: 2.82s
218:	learn: 0.4811295	total: 789ms	remaining: 2.81s
219:	learn: 0.4800552	total: 793ms	remaining: 2.81s
220:	learn: 0.4795163	total: 796ms	remaining: 2.81s
221:	learn: 0.4786248	total: 800ms	remaining: 2.8s
222:	learn: 0.4778513	total: 804ms	remaining: 2.8s
223:	learn: 0.4771565	total: 807ms	remaining: 2.8s
224:	learn: 0.4763662	total: 811ms	remaining: 2.79s
225:	learn: 0.4752713	total: 815ms	remaining: 2.79s
226:	learn: 0.4744328	total: 818ms	remaining: 2.78s
227:	learn: 0.4736467	total: 821ms	remaining: 2.78s
228:	learn: 0.4726439	total: 825ms	remaining: 2.78s
229:	learn: 0.4720170	total: 828ms	remaining: 2.77s
230:	learn: 0.4709224	total: 832ms	remaining: 2.77s
231:	learn: 0.4703208	total: 835ms	remaining: 2.76s
232:	learn: 0.4694894	total: 838ms	remaining: 2.76s
233:	learn: 0.4687498	total: 841ms	remaining: 2.75s
234:	learn: 0.4679239	total: 845ms	remaining: 2.75s
235:	learn: 0.4670138	total: 848ms	remaining: 2.75s
236:	learn: 0.4663092	total: 851ms	remaining: 2.74s
237:	learn: 0.4655198	total: 855ms	remaining: 2.74s
238:	learn: 0.4644130	total: 858ms	remaining: 2.73s
239:	learn: 0.4633542	total: 862ms	remaining: 2.73s
240:	learn: 0.4624652	total: 865ms	remaining: 2.73s
241:	learn: 0.4612790	total: 869ms	remaining: 2.72s
242:	learn: 0.4603322	total: 873ms	remaining: 2.72s
243:	learn: 0.4595621	total: 876ms	remaining: 2.71s
244:	learn: 0.4590279	total: 879ms	remaining: 2.71s
245:	learn: 0.4581471	total: 882ms	remaining: 2.7s
246:	learn: 0.4575228	total: 886ms	remaining: 2.7s
247:	learn: 0.4573403	total: 889ms	remaining: 2.69s
248:	learn: 0.4564604	total: 892ms	remaining: 2.69s
249:	learn: 0.4556477	total: 896ms	remaining: 2.69s
250:	learn: 0.4546236	total: 899ms	remaining: 2.68s
251:	learn: 0.4533804	total: 902ms	remaining: 2.68s
252:	learn: 0.4528083	total: 906ms	remaining: 2.67s
253:	learn: 0.4522885	total: 909ms	remaining: 2.67s
254:	learn: 0.4516671	total: 912ms	remaining: 2.67s
255:	learn: 0.4509067	total: 916ms	remaining: 2.66s
256:	learn: 0.4502604	total: 919ms	remaining: 2.65s
257:	learn: 0.4496600	total: 922ms	remaining: 2.65s
258:	learn: 0.4490141	total: 925ms	remaining: 2.65s
259:	learn: 0.4483730	total: 928ms	remaining: 2.64s
260:	learn: 0.4476013	total: 931ms	remaining: 2.64s
261:	learn: 0.4464556	total: 934ms	remaining: 2.63s
262:	learn: 0.4460153	total: 938ms	remaining: 2.63s
263:	learn: 0.4452759	total: 941ms	remaining: 2.62s
264:	learn: 0.4448251	total: 945ms	remaining: 2.62s
265:	learn: 0.4439872	total: 948ms	remaining: 2.62s
266:	learn: 0.4432581	total: 951ms	remaining: 2.61s
267:	learn: 0.4425783	total: 955ms	remaining: 2.61s
268:	learn: 0.4420917	total: 958ms	remaining: 2.6s
269:	learn: 0.4414548	total: 961ms	remaining: 2.6s
270:	learn: 0.4406199	total: 965ms	remaining: 2.59s
271:	learn: 0.4401848	total: 968ms	remaining: 2.59s
272:	learn: 0.4396981	total: 971ms	remaining: 2.59s
273:	learn: 0.4391266	total: 974ms	remaining: 2.58s
274:	learn: 0.4389288	total: 978ms	remaining: 2.58s
275:	learn: 0.4383482	total: 981ms	remaining: 2.57s
276:	learn: 0.4374541	total: 984ms	remaining: 2.57s
277:	learn: 0.4366817	total: 988ms	remaining: 2.56s
278:	learn: 0.4361637	total: 991ms	remaining: 2.56s
279:	learn: 0.4355985	total: 995ms	remaining: 2.56s
280:	learn: 0.4349846	total: 998ms	remaining: 2.55s
281:	learn: 0.4343480	total: 1s	remaining: 2.55s
282:	learn: 0.4340070	total: 1s	remaining: 2.54s
283:	learn: 0.4330370	total: 1.01s	remaining: 2.54s
284:	learn: 0.4323418	total: 1.01s	remaining: 2.54s
285:	learn: 0.4320533	total: 1.01s	remaining: 2.53s
286:	learn: 0.4312878	total: 1.02s	remaining: 2.53s
287:	learn: 0.4302917	total: 1.02s	remaining: 2.52s
288:	learn: 0.4296243	total: 1.02s	remaining: 2.52s
289:	learn: 0.4289696	total: 1.03s	remaining: 2.51s
290:	learn: 0.4280839	total: 1.03s	remaining: 2.51s
291:	learn: 0.4276341	total: 1.03s	remaining: 2.51s
292:	learn: 0.4273110	total: 1.04s	remaining: 2.5s
293:	learn: 0.4264972	total: 1.04s	remaining: 2.5s
294:	learn: 0.4260751	total: 1.04s	remaining: 2.49s
295:	learn: 0.4250758	total: 1.05s	remaining: 2.49s
296:	learn: 0.4242247	total: 1.05s	remaining: 2.48s
297:	learn: 0.4237303	total: 1.05s	remaining: 2.48s
298:	learn: 0.4229408	total: 1.06s	remaining: 2.48s
299:	learn: 0.4221990	total: 1.06s	remaining: 2.47s
300:	learn: 0.4216592	total: 1.06s	remaining: 2.47s
301:	learn: 0.4215269	total: 1.06s	remaining: 2.46s
302:	learn: 0.4209301	total: 1.07s	remaining: 2.46s
303:	learn: 0.4204768	total: 1.07s	remaining: 2.45s
304:	learn: 0.4199938	total: 1.07s	remaining: 2.45s
305:	learn: 0.4190819	total: 1.08s	remaining: 2.45s
306:	learn: 0.4186059	total: 1.08s	remaining: 2.44s
307:	learn: 0.4179422	total: 1.08s	remaining: 2.44s
308:	learn: 0.4173496	total: 1.09s	remaining: 2.43s
309:	learn: 0.4165331	total: 1.09s	remaining: 2.43s
310:	learn: 0.4156881	total: 1.09s	remaining: 2.42s
311:	learn: 0.4150547	total: 1.1s	remaining: 2.42s
312:	learn: 0.4144039	total: 1.1s	remaining: 2.42s
313:	learn: 0.4137554	total: 1.1s	remaining: 2.41s
314:	learn: 0.4128423	total: 1.11s	remaining: 2.41s
315:	learn: 0.4121851	total: 1.11s	remaining: 2.4s
316:	learn: 0.4120652	total: 1.11s	remaining: 2.4s
317:	learn: 0.4119489	total: 1.12s	remaining: 2.4s
318:	learn: 0.4113800	total: 1.12s	remaining: 2.39s
319:	learn: 0.4110944	total: 1.12s	remaining: 2.39s
320:	learn: 0.4103885	total: 1.13s	remaining: 2.38s
321:	learn: 0.4098944	total: 1.13s	remaining: 2.38s
322:	learn: 0.4096331	total: 1.13s	remaining: 2.38s
323:	learn: 0.4092385	total: 1.14s	remaining: 2.37s
324:	learn: 0.4088671	total: 1.14s	remaining: 2.37s
325:	learn: 0.4085054	total: 1.14s	remaining: 2.36s
326:	learn: 0.4079698	total: 1.15s	remaining: 2.36s
327:	learn: 0.4075026	total: 1.15s	remaining: 2.35s
328:	learn: 0.4065781	total: 1.15s	remaining: 2.35s
329:	learn: 0.4061529	total: 1.16s	remaining: 2.35s
330:	learn: 0.4057269	total: 1.16s	remaining: 2.34s
331:	learn: 0.4050642	total: 1.16s	remaining: 2.34s
332:	learn: 0.4047541	total: 1.17s	remaining: 2.33s
333:	learn: 0.4044138	total: 1.17s	remaining: 2.34s
334:	learn: 0.4040232	total: 1.17s	remaining: 2.33s
335:	learn: 0.4031800	total: 1.18s	remaining: 2.33s
336:	learn: 0.4026754	total: 1.18s	remaining: 2.32s
337:	learn: 0.4022685	total: 1.19s	remaining: 2.32s
338:	learn: 0.4017881	total: 1.19s	remaining: 2.32s
339:	learn: 0.4015254	total: 1.19s	remaining: 2.31s
340:	learn: 0.4009658	total: 1.2s	remaining: 2.31s
341:	learn: 0.4008354	total: 1.2s	remaining: 2.31s
342:	learn: 0.4002425	total: 1.2s	remaining: 2.3s
343:	learn: 0.3997084	total: 1.21s	remaining: 2.3s
344:	learn: 0.3992679	total: 1.21s	remaining: 2.29s
345:	learn: 0.3989327	total: 1.21s	remaining: 2.29s
346:	learn: 0.3983560	total: 1.22s	remaining: 2.29s
347:	learn: 0.3978115	total: 1.22s	remaining: 2.28s
348:	learn: 0.3972325	total: 1.22s	remaining: 2.28s
349:	learn: 0.3970591	total: 1.23s	remaining: 2.27s
350:	learn: 0.3967434	total: 1.23s	remaining: 2.27s
351:	learn: 0.3964092	total: 1.23s	remaining: 2.27s
352:	learn: 0.3959126	total: 1.24s	remaining: 2.26s
353:	learn: 0.3956668	total: 1.24s	remaining: 2.26s
354:	learn: 0.3952955	total: 1.24s	remaining: 2.25s
355:	learn: 0.3948577	total: 1.25s	remaining: 2.25s
356:	learn: 0.3942103	total: 1.25s	remaining: 2.25s
357:	learn: 0.3934125	total: 1.25s	remaining: 2.24s
358:	learn: 0.3929393	total: 1.25s	remaining: 2.24s
359:	learn: 0.3923515	total: 1.26s	remaining: 2.24s
360:	learn: 0.3918921	total: 1.26s	remaining: 2.23s
361:	learn: 0.3913288	total: 1.26s	remaining: 2.23s
362:	learn: 0.3909709	total: 1.27s	remaining: 2.22s
363:	learn: 0.3907805	total: 1.27s	remaining: 2.22s
364:	learn: 0.3902974	total: 1.27s	remaining: 2.22s
365:	learn: 0.3897694	total: 1.28s	remaining: 2.21s
366:	learn: 0.3895400	total: 1.28s	remaining: 2.21s
367:	learn: 0.3887777	total: 1.28s	remaining: 2.2s
368:	learn: 0.3879556	total: 1.29s	remaining: 2.2s
369:	learn: 0.3872718	total: 1.29s	remaining: 2.2s
370:	learn: 0.3866534	total: 1.29s	remaining: 2.19s
371:	learn: 0.3859462	total: 1.3s	remaining: 2.19s
372:	learn: 0.3853502	total: 1.3s	remaining: 2.19s
373:	learn: 0.3851891	total: 1.3s	remaining: 2.18s
374:	learn: 0.3846603	total: 1.31s	remaining: 2.18s
375:	learn: 0.3844259	total: 1.31s	remaining: 2.17s
376:	learn: 0.3838207	total: 1.31s	remaining: 2.17s
377:	learn: 0.3833191	total: 1.32s	remaining: 2.17s
378:	learn: 0.3826555	total: 1.32s	remaining: 2.16s
379:	learn: 0.3825492	total: 1.32s	remaining: 2.16s
380:	learn: 0.3819978	total: 1.33s	remaining: 2.15s
381:	learn: 0.3819036	total: 1.33s	remaining: 2.15s
382:	learn: 0.3814676	total: 1.33s	remaining: 2.15s
383:	learn: 0.3806646	total: 1.34s	remaining: 2.14s
384:	learn: 0.3801625	total: 1.34s	remaining: 2.14s
385:	learn: 0.3795941	total: 1.34s	remaining: 2.13s
386:	learn: 0.3792967	total: 1.34s	remaining: 2.13s
387:	learn: 0.3786377	total: 1.35s	remaining: 2.13s
388:	learn: 0.3781177	total: 1.35s	remaining: 2.12s
389:	learn: 0.3776432	total: 1.35s	remaining: 2.12s
390:	learn: 0.3771742	total: 1.36s	remaining: 2.12s
391:	learn: 0.3767596	total: 1.36s	remaining: 2.11s
392:	learn: 0.3762086	total: 1.36s	remaining: 2.11s
393:	learn: 0.3758127	total: 1.37s	remaining: 2.1s
394:	learn: 0.3752773	total: 1.37s	remaining: 2.1s
395:	learn: 0.3748929	total: 1.37s	remaining: 2.1s
396:	learn: 0.3744049	total: 1.38s	remaining: 2.09s
397:	learn: 0.3742496	total: 1.38s	remaining: 2.09s
398:	learn: 0.3737786	total: 1.39s	remaining: 2.09s
399:	learn: 0.3734626	total: 1.39s	remaining: 2.08s
400:	learn: 0.3730718	total: 1.39s	remaining: 2.08s
401:	learn: 0.3724141	total: 1.39s	remaining: 2.07s
402:	learn: 0.3719930	total: 1.4s	remaining: 2.07s
403:	learn: 0.3716086	total: 1.4s	remaining: 2.07s
404:	learn: 0.3709842	total: 1.4s	remaining: 2.06s
405:	learn: 0.3704663	total: 1.41s	remaining: 2.06s
406:	learn: 0.3699775	total: 1.41s	remaining: 2.06s
407:	learn: 0.3698619	total: 1.41s	remaining: 2.05s
408:	learn: 0.3694625	total: 1.42s	remaining: 2.05s
409:	learn: 0.3688800	total: 1.42s	remaining: 2.04s
410:	learn: 0.3687014	total: 1.42s	remaining: 2.04s
411:	learn: 0.3682739	total: 1.43s	remaining: 2.04s
412:	learn: 0.3678543	total: 1.43s	remaining: 2.03s
413:	learn: 0.3672088	total: 1.43s	remaining: 2.03s
414:	learn: 0.3667914	total: 1.44s	remaining: 2.02s
415:	learn: 0.3666853	total: 1.44s	remaining: 2.02s
416:	learn: 0.3664491	total: 1.44s	remaining: 2.02s
417:	learn: 0.3658085	total: 1.45s	remaining: 2.01s
418:	learn: 0.3655759	total: 1.45s	remaining: 2.01s
419:	learn: 0.3650955	total: 1.45s	remaining: 2.01s
420:	learn: 0.3646143	total: 1.46s	remaining: 2s
421:	learn: 0.3643238	total: 1.46s	remaining: 2s
422:	learn: 0.3640482	total: 1.46s	remaining: 2s
423:	learn: 0.3638433	total: 1.47s	remaining: 1.99s
424:	learn: 0.3634363	total: 1.47s	remaining: 1.99s
425:	learn: 0.3627563	total: 1.47s	remaining: 1.98s
426:	learn: 0.3620870	total: 1.48s	remaining: 1.98s
427:	learn: 0.3615678	total: 1.48s	remaining: 1.98s
428:	learn: 0.3608775	total: 1.48s	remaining: 1.97s
429:	learn: 0.3605767	total: 1.48s	remaining: 1.97s
430:	learn: 0.3600947	total: 1.49s	remaining: 1.96s
431:	learn: 0.3599022	total: 1.49s	remaining: 1.96s
432:	learn: 0.3592169	total: 1.49s	remaining: 1.96s
433:	learn: 0.3586974	total: 1.5s	remaining: 1.95s
434:	learn: 0.3583809	total: 1.5s	remaining: 1.95s
435:	learn: 0.3580570	total: 1.5s	remaining: 1.95s
436:	learn: 0.3579281	total: 1.51s	remaining: 1.94s
437:	learn: 0.3572908	total: 1.51s	remaining: 1.94s
438:	learn: 0.3567620	total: 1.51s	remaining: 1.93s
439:	learn: 0.3563369	total: 1.52s	remaining: 1.93s
440:	learn: 0.3559366	total: 1.52s	remaining: 1.93s
441:	learn: 0.3555465	total: 1.52s	remaining: 1.92s
442:	learn: 0.3551928	total: 1.53s	remaining: 1.92s
443:	learn: 0.3546176	total: 1.53s	remaining: 1.92s
444:	learn: 0.3542245	total: 1.53s	remaining: 1.91s
445:	learn: 0.3537539	total: 1.54s	remaining: 1.91s
446:	learn: 0.3533939	total: 1.54s	remaining: 1.91s
447:	learn: 0.3530292	total: 1.54s	remaining: 1.9s
448:	learn: 0.3527625	total: 1.55s	remaining: 1.9s
449:	learn: 0.3523223	total: 1.55s	remaining: 1.89s
450:	learn: 0.3522093	total: 1.55s	remaining: 1.89s
451:	learn: 0.3515991	total: 1.56s	remaining: 1.89s
452:	learn: 0.3513537	total: 1.56s	remaining: 1.88s
453:	learn: 0.3508370	total: 1.56s	remaining: 1.88s
454:	learn: 0.3505712	total: 1.57s	remaining: 1.88s
455:	learn: 0.3500935	total: 1.57s	remaining: 1.87s
456:	learn: 0.3497203	total: 1.57s	remaining: 1.87s
457:	learn: 0.3493516	total: 1.58s	remaining: 1.87s
458:	learn: 0.3493006	total: 1.58s	remaining: 1.86s
459:	learn: 0.3492219	total: 1.58s	remaining: 1.86s
460:	learn: 0.3486440	total: 1.59s	remaining: 1.86s
461:	learn: 0.3481056	total: 1.59s	remaining: 1.85s
462:	learn: 0.3476578	total: 1.59s	remaining: 1.85s
463:	learn: 0.3472720	total: 1.6s	remaining: 1.85s
464:	learn: 0.3469809	total: 1.6s	remaining: 1.84s
465:	learn: 0.3464662	total: 1.6s	remaining: 1.84s
466:	learn: 0.3461573	total: 1.61s	remaining: 1.83s
467:	learn: 0.3455262	total: 1.61s	remaining: 1.83s
468:	learn: 0.3450315	total: 1.61s	remaining: 1.83s
469:	learn: 0.3447621	total: 1.62s	remaining: 1.82s
470:	learn: 0.3443707	total: 1.62s	remaining: 1.82s
471:	learn: 0.3440078	total: 1.62s	remaining: 1.82s
472:	learn: 0.3436588	total: 1.63s	remaining: 1.81s
473:	learn: 0.3434317	total: 1.63s	remaining: 1.81s
474:	learn: 0.3429587	total: 1.63s	remaining: 1.81s
475:	learn: 0.3427129	total: 1.64s	remaining: 1.8s
476:	learn: 0.3426614	total: 1.64s	remaining: 1.8s
477:	learn: 0.3424276	total: 1.65s	remaining: 1.8s
478:	learn: 0.3419850	total: 1.65s	remaining: 1.79s
479:	learn: 0.3414828	total: 1.65s	remaining: 1.79s
480:	learn: 0.3414148	total: 1.65s	remaining: 1.78s
481:	learn: 0.3411039	total: 1.66s	remaining: 1.78s
482:	learn: 0.3404094	total: 1.66s	remaining: 1.78s
483:	learn: 0.3400870	total: 1.66s	remaining: 1.77s
484:	learn: 0.3397870	total: 1.67s	remaining: 1.77s
485:	learn: 0.3395923	total: 1.67s	remaining: 1.77s
486:	learn: 0.3393283	total: 1.68s	remaining: 1.76s
487:	learn: 0.3390542	total: 1.68s	remaining: 1.76s
488:	learn: 0.3387735	total: 1.68s	remaining: 1.76s
489:	learn: 0.3384992	total: 1.69s	remaining: 1.75s
490:	learn: 0.3382679	total: 1.69s	remaining: 1.75s
491:	learn: 0.3379591	total: 1.69s	remaining: 1.75s
492:	learn: 0.3375240	total: 1.7s	remaining: 1.74s
493:	learn: 0.3371663	total: 1.7s	remaining: 1.74s
494:	learn: 0.3368073	total: 1.7s	remaining: 1.74s
495:	learn: 0.3364258	total: 1.71s	remaining: 1.73s
496:	learn: 0.3357624	total: 1.71s	remaining: 1.73s
497:	learn: 0.3353391	total: 1.71s	remaining: 1.73s
498:	learn: 0.3351359	total: 1.72s	remaining: 1.72s
499:	learn: 0.3347721	total: 1.72s	remaining: 1.72s
500:	learn: 0.3344689	total: 1.72s	remaining: 1.72s
501:	learn: 0.3338885	total: 1.73s	remaining: 1.71s
502:	learn: 0.3335304	total: 1.73s	remaining: 1.71s
503:	learn: 0.3333156	total: 1.73s	remaining: 1.7s
504:	learn: 0.3326858	total: 1.74s	remaining: 1.7s
505:	learn: 0.3322794	total: 1.74s	remaining: 1.7s
506:	learn: 0.3317957	total: 1.74s	remaining: 1.69s
507:	learn: 0.3315374	total: 1.75s	remaining: 1.69s
508:	learn: 0.3310346	total: 1.75s	remaining: 1.69s
509:	learn: 0.3308218	total: 1.75s	remaining: 1.68s
510:	learn: 0.3305515	total: 1.76s	remaining: 1.68s
511:	learn: 0.3303324	total: 1.76s	remaining: 1.68s
512:	learn: 0.3298027	total: 1.76s	remaining: 1.67s
513:	learn: 0.3295265	total: 1.77s	remaining: 1.67s
514:	learn: 0.3292161	total: 1.77s	remaining: 1.67s
515:	learn: 0.3289082	total: 1.77s	remaining: 1.67s
516:	learn: 0.3284967	total: 1.78s	remaining: 1.66s
517:	learn: 0.3280931	total: 1.78s	remaining: 1.66s
518:	learn: 0.3277610	total: 1.79s	remaining: 1.66s
519:	learn: 0.3273487	total: 1.79s	remaining: 1.65s
520:	learn: 0.3270085	total: 1.79s	remaining: 1.65s
521:	learn: 0.3267775	total: 1.8s	remaining: 1.65s
522:	learn: 0.3265932	total: 1.8s	remaining: 1.64s
523:	learn: 0.3262778	total: 1.8s	remaining: 1.64s
524:	learn: 0.3256224	total: 1.81s	remaining: 1.64s
525:	learn: 0.3253104	total: 1.81s	remaining: 1.63s
526:	learn: 0.3252620	total: 1.81s	remaining: 1.63s
527:	learn: 0.3248235	total: 1.82s	remaining: 1.63s
528:	learn: 0.3246449	total: 1.82s	remaining: 1.62s
529:	learn: 0.3241613	total: 1.82s	remaining: 1.62s
530:	learn: 0.3239438	total: 1.83s	remaining: 1.61s
531:	learn: 0.3235460	total: 1.83s	remaining: 1.61s
532:	learn: 0.3228695	total: 1.84s	remaining: 1.61s
533:	learn: 0.3226564	total: 1.84s	remaining: 1.6s
534:	learn: 0.3223583	total: 1.84s	remaining: 1.6s
535:	learn: 0.3221698	total: 1.85s	remaining: 1.6s
536:	learn: 0.3216386	total: 1.85s	remaining: 1.59s
537:	learn: 0.3208786	total: 1.85s	remaining: 1.59s
538:	learn: 0.3206405	total: 1.86s	remaining: 1.59s
539:	learn: 0.3200470	total: 1.86s	remaining: 1.58s
540:	learn: 0.3194599	total: 1.86s	remaining: 1.58s
541:	learn: 0.3191917	total: 1.87s	remaining: 1.58s
542:	learn: 0.3187413	total: 1.87s	remaining: 1.57s
543:	learn: 0.3183845	total: 1.87s	remaining: 1.57s
544:	learn: 0.3181347	total: 1.88s	remaining: 1.57s
545:	learn: 0.3179402	total: 1.88s	remaining: 1.56s
546:	learn: 0.3176041	total: 1.88s	remaining: 1.56s
547:	learn: 0.3171493	total: 1.89s	remaining: 1.56s
548:	learn: 0.3167678	total: 1.89s	remaining: 1.55s
549:	learn: 0.3161866	total: 1.89s	remaining: 1.55s
550:	learn: 0.3158943	total: 1.9s	remaining: 1.54s
551:	learn: 0.3155500	total: 1.9s	remaining: 1.54s
552:	learn: 0.3151446	total: 1.9s	remaining: 1.54s
553:	learn: 0.3150676	total: 1.91s	remaining: 1.53s
554:	learn: 0.3148019	total: 1.91s	remaining: 1.53s
555:	learn: 0.3146087	total: 1.91s	remaining: 1.53s
556:	learn: 0.3145459	total: 1.92s	remaining: 1.52s
557:	learn: 0.3144546	total: 1.92s	remaining: 1.52s
558:	learn: 0.3140608	total: 1.92s	remaining: 1.52s
559:	learn: 0.3137361	total: 1.93s	remaining: 1.51s
560:	learn: 0.3134164	total: 1.93s	remaining: 1.51s
561:	learn: 0.3133328	total: 1.93s	remaining: 1.51s
562:	learn: 0.3129987	total: 1.94s	remaining: 1.5s
563:	learn: 0.3127488	total: 1.94s	remaining: 1.5s
564:	learn: 0.3124070	total: 1.94s	remaining: 1.5s
565:	learn: 0.3120479	total: 1.95s	remaining: 1.49s
566:	learn: 0.3118329	total: 1.95s	remaining: 1.49s
567:	learn: 0.3115109	total: 1.95s	remaining: 1.49s
568:	learn: 0.3112201	total: 1.96s	remaining: 1.48s
569:	learn: 0.3108064	total: 1.96s	remaining: 1.48s
570:	learn: 0.3105657	total: 1.96s	remaining: 1.47s
571:	learn: 0.3102145	total: 1.97s	remaining: 1.47s
572:	learn: 0.3097385	total: 1.97s	remaining: 1.47s
573:	learn: 0.3093835	total: 1.97s	remaining: 1.47s
574:	learn: 0.3090351	total: 1.98s	remaining: 1.46s
575:	learn: 0.3087266	total: 1.98s	remaining: 1.46s
576:	learn: 0.3085897	total: 1.98s	remaining: 1.45s
577:	learn: 0.3083184	total: 1.99s	remaining: 1.45s
578:	learn: 0.3082340	total: 1.99s	remaining: 1.45s
579:	learn: 0.3076160	total: 1.99s	remaining: 1.44s
580:	learn: 0.3073862	total: 2s	remaining: 1.44s
581:	learn: 0.3070794	total: 2s	remaining: 1.44s
582:	learn: 0.3069234	total: 2s	remaining: 1.43s
583:	learn: 0.3068346	total: 2.01s	remaining: 1.43s
584:	learn: 0.3065929	total: 2.01s	remaining: 1.43s
585:	learn: 0.3063195	total: 2.01s	remaining: 1.42s
586:	learn: 0.3059346	total: 2.02s	remaining: 1.42s
587:	learn: 0.3056757	total: 2.02s	remaining: 1.42s
588:	learn: 0.3053577	total: 2.02s	remaining: 1.41s
589:	learn: 0.3049893	total: 2.03s	remaining: 1.41s
590:	learn: 0.3047812	total: 2.03s	remaining: 1.4s
591:	learn: 0.3046023	total: 2.03s	remaining: 1.4s
592:	learn: 0.3042608	total: 2.04s	remaining: 1.4s
593:	learn: 0.3039681	total: 2.04s	remaining: 1.39s
594:	learn: 0.3035661	total: 2.04s	remaining: 1.39s
595:	learn: 0.3033736	total: 2.05s	remaining: 1.39s
596:	learn: 0.3029874	total: 2.05s	remaining: 1.38s
597:	learn: 0.3023723	total: 2.05s	remaining: 1.38s
598:	learn: 0.3020138	total: 2.06s	remaining: 1.38s
599:	learn: 0.3017719	total: 2.06s	remaining: 1.37s
600:	learn: 0.3015049	total: 2.06s	remaining: 1.37s
601:	learn: 0.3011821	total: 2.06s	remaining: 1.36s
602:	learn: 0.3009602	total: 2.07s	remaining: 1.36s
603:	learn: 0.3005602	total: 2.07s	remaining: 1.36s
604:	learn: 0.3004219	total: 2.08s	remaining: 1.35s
605:	learn: 0.3002263	total: 2.08s	remaining: 1.35s
606:	learn: 0.2999867	total: 2.08s	remaining: 1.35s
607:	learn: 0.2994174	total: 2.08s	remaining: 1.34s
608:	learn: 0.2991720	total: 2.09s	remaining: 1.34s
609:	learn: 0.2987068	total: 2.09s	remaining: 1.34s
610:	learn: 0.2985418	total: 2.09s	remaining: 1.33s
611:	learn: 0.2981738	total: 2.1s	remaining: 1.33s
612:	learn: 0.2979976	total: 2.1s	remaining: 1.33s
613:	learn: 0.2976747	total: 2.1s	remaining: 1.32s
614:	learn: 0.2973307	total: 2.11s	remaining: 1.32s
615:	learn: 0.2970624	total: 2.11s	remaining: 1.31s
616:	learn: 0.2967306	total: 2.11s	remaining: 1.31s
617:	learn: 0.2964076	total: 2.12s	remaining: 1.31s
618:	learn: 0.2962058	total: 2.12s	remaining: 1.3s
619:	learn: 0.2957920	total: 2.12s	remaining: 1.3s
620:	learn: 0.2954920	total: 2.13s	remaining: 1.3s
621:	learn: 0.2950938	total: 2.13s	remaining: 1.29s
622:	learn: 0.2948042	total: 2.13s	remaining: 1.29s
623:	learn: 0.2943325	total: 2.13s	remaining: 1.29s
624:	learn: 0.2940090	total: 2.14s	remaining: 1.28s
625:	learn: 0.2934846	total: 2.14s	remaining: 1.28s
626:	learn: 0.2930588	total: 2.15s	remaining: 1.28s
627:	learn: 0.2928589	total: 2.15s	remaining: 1.27s
628:	learn: 0.2924964	total: 2.15s	remaining: 1.27s
629:	learn: 0.2924495	total: 2.15s	remaining: 1.26s
630:	learn: 0.2920443	total: 2.16s	remaining: 1.26s
631:	learn: 0.2916802	total: 2.16s	remaining: 1.26s
632:	learn: 0.2914135	total: 2.17s	remaining: 1.25s
633:	learn: 0.2912321	total: 2.17s	remaining: 1.25s
634:	learn: 0.2909848	total: 2.17s	remaining: 1.25s
635:	learn: 0.2904671	total: 2.17s	remaining: 1.25s
636:	learn: 0.2902028	total: 2.18s	remaining: 1.24s
637:	learn: 0.2898836	total: 2.18s	remaining: 1.24s
638:	learn: 0.2895101	total: 2.19s	remaining: 1.23s
639:	learn: 0.2892208	total: 2.19s	remaining: 1.23s
640:	learn: 0.2891015	total: 2.19s	remaining: 1.23s
641:	learn: 0.2888759	total: 2.19s	remaining: 1.22s
642:	learn: 0.2887283	total: 2.2s	remaining: 1.22s
643:	learn: 0.2885609	total: 2.2s	remaining: 1.22s
644:	learn: 0.2881727	total: 2.2s	remaining: 1.21s
645:	learn: 0.2877596	total: 2.21s	remaining: 1.21s
646:	learn: 0.2874737	total: 2.21s	remaining: 1.21s
647:	learn: 0.2870647	total: 2.21s	remaining: 1.2s
648:	learn: 0.2865679	total: 2.22s	remaining: 1.2s
649:	learn: 0.2863326	total: 2.22s	remaining: 1.2s
650:	learn: 0.2861018	total: 2.22s	remaining: 1.19s
651:	learn: 0.2857576	total: 2.23s	remaining: 1.19s
652:	learn: 0.2853869	total: 2.23s	remaining: 1.18s
653:	learn: 0.2851109	total: 2.23s	remaining: 1.18s
654:	learn: 0.2848755	total: 2.23s	remaining: 1.18s
655:	learn: 0.2847261	total: 2.24s	remaining: 1.17s
656:	learn: 0.2844077	total: 2.24s	remaining: 1.17s
657:	learn: 0.2840529	total: 2.25s	remaining: 1.17s
658:	learn: 0.2838391	total: 2.25s	remaining: 1.16s
659:	learn: 0.2833714	total: 2.25s	remaining: 1.16s
660:	learn: 0.2831854	total: 2.25s	remaining: 1.16s
661:	learn: 0.2831066	total: 2.26s	remaining: 1.15s
662:	learn: 0.2827877	total: 2.26s	remaining: 1.15s
663:	learn: 0.2825169	total: 2.26s	remaining: 1.15s
664:	learn: 0.2819040	total: 2.27s	remaining: 1.14s
665:	learn: 0.2815437	total: 2.27s	remaining: 1.14s
666:	learn: 0.2812914	total: 2.27s	remaining: 1.14s
667:	learn: 0.2809317	total: 2.28s	remaining: 1.13s
668:	learn: 0.2805022	total: 2.28s	remaining: 1.13s
669:	learn: 0.2802108	total: 2.28s	remaining: 1.12s
670:	learn: 0.2800260	total: 2.29s	remaining: 1.12s
671:	learn: 0.2796513	total: 2.29s	remaining: 1.12s
672:	learn: 0.2793144	total: 2.29s	remaining: 1.11s
673:	learn: 0.2787577	total: 2.3s	remaining: 1.11s
674:	learn: 0.2786898	total: 2.3s	remaining: 1.11s
675:	learn: 0.2783373	total: 2.3s	remaining: 1.1s
676:	learn: 0.2780719	total: 2.31s	remaining: 1.1s
677:	learn: 0.2778878	total: 2.31s	remaining: 1.1s
678:	learn: 0.2775800	total: 2.31s	remaining: 1.09s
679:	learn: 0.2774185	total: 2.31s	remaining: 1.09s
680:	learn: 0.2773129	total: 2.32s	remaining: 1.08s
681:	learn: 0.2770925	total: 2.32s	remaining: 1.08s
682:	learn: 0.2768828	total: 2.32s	remaining: 1.08s
683:	learn: 0.2768476	total: 2.33s	remaining: 1.07s
684:	learn: 0.2767953	total: 2.33s	remaining: 1.07s
685:	learn: 0.2764390	total: 2.33s	remaining: 1.07s
686:	learn: 0.2762438	total: 2.34s	remaining: 1.06s
687:	learn: 0.2758780	total: 2.34s	remaining: 1.06s
688:	learn: 0.2755027	total: 2.34s	remaining: 1.06s
689:	learn: 0.2751770	total: 2.35s	remaining: 1.05s
690:	learn: 0.2747903	total: 2.35s	remaining: 1.05s
691:	learn: 0.2744635	total: 2.35s	remaining: 1.05s
692:	learn: 0.2740074	total: 2.36s	remaining: 1.04s
693:	learn: 0.2739810	total: 2.36s	remaining: 1.04s
694:	learn: 0.2736186	total: 2.36s	remaining: 1.04s
695:	learn: 0.2734820	total: 2.37s	remaining: 1.03s
696:	learn: 0.2732142	total: 2.37s	remaining: 1.03s
697:	learn: 0.2729981	total: 2.37s	remaining: 1.03s
698:	learn: 0.2728261	total: 2.38s	remaining: 1.02s
699:	learn: 0.2725844	total: 2.38s	remaining: 1.02s
700:	learn: 0.2721828	total: 2.38s	remaining: 1.01s
701:	learn: 0.2719829	total: 2.38s	remaining: 1.01s
702:	learn: 0.2719228	total: 2.39s	remaining: 1.01s
703:	learn: 0.2716038	total: 2.39s	remaining: 1s
704:	learn: 0.2714065	total: 2.39s	remaining: 1s
705:	learn: 0.2710144	total: 2.4s	remaining: 998ms
706:	learn: 0.2709227	total: 2.4s	remaining: 995ms
707:	learn: 0.2707329	total: 2.4s	remaining: 991ms
708:	learn: 0.2704630	total: 2.41s	remaining: 988ms
709:	learn: 0.2702448	total: 2.41s	remaining: 984ms
710:	learn: 0.2700055	total: 2.41s	remaining: 981ms
711:	learn: 0.2694917	total: 2.42s	remaining: 977ms
712:	learn: 0.2693000	total: 2.42s	remaining: 974ms
713:	learn: 0.2690283	total: 2.42s	remaining: 970ms
714:	learn: 0.2687780	total: 2.42s	remaining: 967ms
715:	learn: 0.2687384	total: 2.43s	remaining: 963ms
716:	learn: 0.2685078	total: 2.43s	remaining: 960ms
717:	learn: 0.2681714	total: 2.43s	remaining: 956ms
718:	learn: 0.2679557	total: 2.44s	remaining: 953ms
719:	learn: 0.2678159	total: 2.44s	remaining: 949ms
720:	learn: 0.2676483	total: 2.44s	remaining: 946ms
721:	learn: 0.2674793	total: 2.45s	remaining: 942ms
722:	learn: 0.2669124	total: 2.45s	remaining: 939ms
723:	learn: 0.2666120	total: 2.45s	remaining: 935ms
724:	learn: 0.2665815	total: 2.46s	remaining: 932ms
725:	learn: 0.2662691	total: 2.46s	remaining: 928ms
726:	learn: 0.2661253	total: 2.46s	remaining: 925ms
727:	learn: 0.2659665	total: 2.47s	remaining: 922ms
728:	learn: 0.2656264	total: 2.47s	remaining: 918ms
729:	learn: 0.2654130	total: 2.47s	remaining: 915ms
730:	learn: 0.2652073	total: 2.48s	remaining: 911ms
731:	learn: 0.2649612	total: 2.48s	remaining: 908ms
732:	learn: 0.2648599	total: 2.48s	remaining: 905ms
733:	learn: 0.2646590	total: 2.49s	remaining: 901ms
734:	learn: 0.2644626	total: 2.49s	remaining: 898ms
735:	learn: 0.2641833	total: 2.49s	remaining: 894ms
736:	learn: 0.2639876	total: 2.5s	remaining: 891ms
737:	learn: 0.2637405	total: 2.5s	remaining: 887ms
738:	learn: 0.2636841	total: 2.5s	remaining: 884ms
739:	learn: 0.2632669	total: 2.51s	remaining: 881ms
740:	learn: 0.2629340	total: 2.51s	remaining: 877ms
741:	learn: 0.2624565	total: 2.51s	remaining: 874ms
742:	learn: 0.2621763	total: 2.52s	remaining: 871ms
743:	learn: 0.2620465	total: 2.52s	remaining: 867ms
744:	learn: 0.2617521	total: 2.52s	remaining: 864ms
745:	learn: 0.2616214	total: 2.53s	remaining: 860ms
746:	learn: 0.2613156	total: 2.53s	remaining: 857ms
747:	learn: 0.2610295	total: 2.53s	remaining: 854ms
748:	learn: 0.2605249	total: 2.54s	remaining: 850ms
749:	learn: 0.2602300	total: 2.54s	remaining: 847ms
750:	learn: 0.2598565	total: 2.54s	remaining: 843ms
751:	learn: 0.2595145	total: 2.55s	remaining: 840ms
752:	learn: 0.2590396	total: 2.55s	remaining: 837ms
753:	learn: 0.2586583	total: 2.55s	remaining: 833ms
754:	learn: 0.2583753	total: 2.56s	remaining: 830ms
755:	learn: 0.2581020	total: 2.56s	remaining: 827ms
756:	learn: 0.2576964	total: 2.56s	remaining: 823ms
757:	learn: 0.2576649	total: 2.57s	remaining: 820ms
758:	learn: 0.2572911	total: 2.57s	remaining: 816ms
759:	learn: 0.2572575	total: 2.57s	remaining: 813ms
760:	learn: 0.2569378	total: 2.58s	remaining: 810ms
761:	learn: 0.2567645	total: 2.58s	remaining: 806ms
762:	learn: 0.2565097	total: 2.58s	remaining: 803ms
763:	learn: 0.2560526	total: 2.59s	remaining: 799ms
764:	learn: 0.2560135	total: 2.59s	remaining: 796ms
765:	learn: 0.2555596	total: 2.59s	remaining: 792ms
766:	learn: 0.2553998	total: 2.6s	remaining: 789ms
767:	learn: 0.2551371	total: 2.6s	remaining: 786ms
768:	learn: 0.2550625	total: 2.6s	remaining: 782ms
769:	learn: 0.2547166	total: 2.61s	remaining: 779ms
770:	learn: 0.2545018	total: 2.61s	remaining: 775ms
771:	learn: 0.2541630	total: 2.61s	remaining: 772ms
772:	learn: 0.2537533	total: 2.62s	remaining: 768ms
773:	learn: 0.2536258	total: 2.62s	remaining: 765ms
774:	learn: 0.2536134	total: 2.62s	remaining: 762ms
775:	learn: 0.2535560	total: 2.63s	remaining: 758ms
776:	learn: 0.2533754	total: 2.63s	remaining: 755ms
777:	learn: 0.2529591	total: 2.63s	remaining: 751ms
778:	learn: 0.2527568	total: 2.64s	remaining: 748ms
779:	learn: 0.2523681	total: 2.64s	remaining: 744ms
780:	learn: 0.2519061	total: 2.64s	remaining: 741ms
781:	learn: 0.2516554	total: 2.65s	remaining: 738ms
782:	learn: 0.2516155	total: 2.65s	remaining: 734ms
783:	learn: 0.2514085	total: 2.65s	remaining: 731ms
784:	learn: 0.2512819	total: 2.65s	remaining: 727ms
785:	learn: 0.2511030	total: 2.66s	remaining: 724ms
786:	learn: 0.2508719	total: 2.66s	remaining: 721ms
787:	learn: 0.2507493	total: 2.67s	remaining: 717ms
788:	learn: 0.2506815	total: 2.67s	remaining: 714ms
789:	learn: 0.2506345	total: 2.67s	remaining: 710ms
790:	learn: 0.2503126	total: 2.67s	remaining: 707ms
791:	learn: 0.2500426	total: 2.68s	remaining: 703ms
792:	learn: 0.2498370	total: 2.68s	remaining: 700ms
793:	learn: 0.2496517	total: 2.68s	remaining: 696ms
794:	learn: 0.2492671	total: 2.69s	remaining: 693ms
795:	learn: 0.2490305	total: 2.69s	remaining: 690ms
796:	learn: 0.2487405	total: 2.69s	remaining: 686ms
797:	learn: 0.2485630	total: 2.7s	remaining: 683ms
798:	learn: 0.2485411	total: 2.7s	remaining: 679ms
799:	learn: 0.2482743	total: 2.7s	remaining: 676ms
800:	learn: 0.2479895	total: 2.71s	remaining: 672ms
801:	learn: 0.2476468	total: 2.71s	remaining: 669ms
802:	learn: 0.2476272	total: 2.71s	remaining: 666ms
803:	learn: 0.2474233	total: 2.72s	remaining: 662ms
804:	learn: 0.2471691	total: 2.72s	remaining: 659ms
805:	learn: 0.2470093	total: 2.72s	remaining: 656ms
806:	learn: 0.2466640	total: 2.73s	remaining: 653ms
807:	learn: 0.2464114	total: 2.74s	remaining: 650ms
808:	learn: 0.2462497	total: 2.74s	remaining: 648ms
809:	learn: 0.2458715	total: 2.75s	remaining: 644ms
810:	learn: 0.2455936	total: 2.75s	remaining: 641ms
811:	learn: 0.2454435	total: 2.76s	remaining: 638ms
812:	learn: 0.2452169	total: 2.76s	remaining: 635ms
813:	learn: 0.2451734	total: 2.76s	remaining: 631ms
814:	learn: 0.2448680	total: 2.77s	remaining: 628ms
815:	learn: 0.2448261	total: 2.77s	remaining: 625ms
816:	learn: 0.2443975	total: 2.77s	remaining: 621ms
817:	learn: 0.2443746	total: 2.78s	remaining: 618ms
818:	learn: 0.2440732	total: 2.78s	remaining: 614ms
819:	learn: 0.2439608	total: 2.78s	remaining: 611ms
820:	learn: 0.2438886	total: 2.79s	remaining: 607ms
821:	learn: 0.2437090	total: 2.79s	remaining: 604ms
822:	learn: 0.2436873	total: 2.79s	remaining: 601ms
823:	learn: 0.2434946	total: 2.8s	remaining: 597ms
824:	learn: 0.2431038	total: 2.8s	remaining: 594ms
825:	learn: 0.2428742	total: 2.8s	remaining: 590ms
826:	learn: 0.2425255	total: 2.81s	remaining: 587ms
827:	learn: 0.2423590	total: 2.81s	remaining: 584ms
828:	learn: 0.2418667	total: 2.81s	remaining: 580ms
829:	learn: 0.2415327	total: 2.81s	remaining: 577ms
830:	learn: 0.2412411	total: 2.82s	remaining: 573ms
831:	learn: 0.2408410	total: 2.82s	remaining: 570ms
832:	learn: 0.2407998	total: 2.83s	remaining: 566ms
833:	learn: 0.2406402	total: 2.83s	remaining: 563ms
834:	learn: 0.2406208	total: 2.83s	remaining: 560ms
835:	learn: 0.2405666	total: 2.83s	remaining: 556ms
836:	learn: 0.2403029	total: 2.84s	remaining: 553ms
837:	learn: 0.2401250	total: 2.84s	remaining: 549ms
838:	learn: 0.2401027	total: 2.84s	remaining: 546ms
839:	learn: 0.2398424	total: 2.85s	remaining: 542ms
840:	learn: 0.2398274	total: 2.85s	remaining: 539ms
841:	learn: 0.2395215	total: 2.85s	remaining: 536ms
842:	learn: 0.2392416	total: 2.86s	remaining: 532ms
843:	learn: 0.2390250	total: 2.86s	remaining: 529ms
844:	learn: 0.2388586	total: 2.86s	remaining: 525ms
845:	learn: 0.2386279	total: 2.87s	remaining: 522ms
846:	learn: 0.2384384	total: 2.87s	remaining: 518ms
847:	learn: 0.2383070	total: 2.87s	remaining: 515ms
848:	learn: 0.2381507	total: 2.88s	remaining: 511ms
849:	learn: 0.2379354	total: 2.88s	remaining: 508ms
850:	learn: 0.2376864	total: 2.88s	remaining: 505ms
851:	learn: 0.2373329	total: 2.88s	remaining: 501ms
852:	learn: 0.2372335	total: 2.89s	remaining: 498ms
853:	learn: 0.2367554	total: 2.89s	remaining: 494ms
854:	learn: 0.2365606	total: 2.89s	remaining: 491ms
855:	learn: 0.2365435	total: 2.9s	remaining: 488ms
856:	learn: 0.2362393	total: 2.9s	remaining: 484ms
857:	learn: 0.2361009	total: 2.9s	remaining: 481ms
858:	learn: 0.2360136	total: 2.91s	remaining: 477ms
859:	learn: 0.2357448	total: 2.91s	remaining: 474ms
860:	learn: 0.2354335	total: 2.91s	remaining: 470ms
861:	learn: 0.2353438	total: 2.92s	remaining: 467ms
862:	learn: 0.2353260	total: 2.92s	remaining: 463ms
863:	learn: 0.2348839	total: 2.92s	remaining: 460ms
864:	learn: 0.2347374	total: 2.93s	remaining: 457ms
865:	learn: 0.2345540	total: 2.93s	remaining: 453ms
866:	learn: 0.2343686	total: 2.93s	remaining: 450ms
867:	learn: 0.2341852	total: 2.94s	remaining: 446ms
868:	learn: 0.2339969	total: 2.94s	remaining: 443ms
869:	learn: 0.2339190	total: 2.94s	remaining: 440ms
870:	learn: 0.2337132	total: 2.94s	remaining: 436ms
871:	learn: 0.2333663	total: 2.95s	remaining: 433ms
872:	learn: 0.2331696	total: 2.95s	remaining: 429ms
873:	learn: 0.2329565	total: 2.96s	remaining: 426ms
874:	learn: 0.2326612	total: 2.96s	remaining: 423ms
875:	learn: 0.2323790	total: 2.96s	remaining: 419ms
876:	learn: 0.2322076	total: 2.96s	remaining: 416ms
877:	learn: 0.2319440	total: 2.97s	remaining: 413ms
878:	learn: 0.2317660	total: 2.97s	remaining: 409ms
879:	learn: 0.2317280	total: 2.98s	remaining: 406ms
880:	learn: 0.2317000	total: 2.98s	remaining: 402ms
881:	learn: 0.2315801	total: 2.98s	remaining: 399ms
882:	learn: 0.2315676	total: 2.98s	remaining: 395ms
883:	learn: 0.2314147	total: 2.99s	remaining: 392ms
884:	learn: 0.2312086	total: 2.99s	remaining: 389ms
885:	learn: 0.2308071	total: 2.99s	remaining: 385ms
886:	learn: 0.2304809	total: 3s	remaining: 382ms
887:	learn: 0.2302192	total: 3s	remaining: 378ms
888:	learn: 0.2299117	total: 3s	remaining: 375ms
889:	learn: 0.2297213	total: 3.01s	remaining: 372ms
890:	learn: 0.2296087	total: 3.01s	remaining: 368ms
891:	learn: 0.2294252	total: 3.01s	remaining: 365ms
892:	learn: 0.2292066	total: 3.02s	remaining: 361ms
893:	learn: 0.2288881	total: 3.02s	remaining: 358ms
894:	learn: 0.2286894	total: 3.02s	remaining: 355ms
895:	learn: 0.2284812	total: 3.03s	remaining: 351ms
896:	learn: 0.2281809	total: 3.03s	remaining: 348ms
897:	learn: 0.2279510	total: 3.03s	remaining: 345ms
898:	learn: 0.2279389	total: 3.04s	remaining: 341ms
899:	learn: 0.2277300	total: 3.04s	remaining: 338ms
900:	learn: 0.2274465	total: 3.04s	remaining: 334ms
901:	learn: 0.2274025	total: 3.04s	remaining: 331ms
902:	learn: 0.2272341	total: 3.05s	remaining: 327ms
903:	learn: 0.2272163	total: 3.05s	remaining: 324ms
904:	learn: 0.2270345	total: 3.06s	remaining: 321ms
905:	learn: 0.2270112	total: 3.06s	remaining: 317ms
906:	learn: 0.2267261	total: 3.06s	remaining: 314ms
907:	learn: 0.2265868	total: 3.06s	remaining: 310ms
908:	learn: 0.2262635	total: 3.07s	remaining: 307ms
909:	learn: 0.2261474	total: 3.07s	remaining: 304ms
910:	learn: 0.2261271	total: 3.07s	remaining: 300ms
911:	learn: 0.2259786	total: 3.08s	remaining: 297ms
912:	learn: 0.2257185	total: 3.08s	remaining: 293ms
913:	learn: 0.2255092	total: 3.08s	remaining: 290ms
914:	learn: 0.2253473	total: 3.08s	remaining: 287ms
915:	learn: 0.2252805	total: 3.09s	remaining: 283ms
916:	learn: 0.2252673	total: 3.09s	remaining: 280ms
917:	learn: 0.2250309	total: 3.09s	remaining: 276ms
918:	learn: 0.2250015	total: 3.1s	remaining: 273ms
919:	learn: 0.2249589	total: 3.1s	remaining: 270ms
920:	learn: 0.2248454	total: 3.1s	remaining: 266ms
921:	learn: 0.2246542	total: 3.11s	remaining: 263ms
922:	learn: 0.2243981	total: 3.11s	remaining: 259ms
923:	learn: 0.2242925	total: 3.11s	remaining: 256ms
924:	learn: 0.2240367	total: 3.12s	remaining: 253ms
925:	learn: 0.2236834	total: 3.12s	remaining: 249ms
926:	learn: 0.2234257	total: 3.12s	remaining: 246ms
927:	learn: 0.2230992	total: 3.13s	remaining: 243ms
928:	learn: 0.2230899	total: 3.13s	remaining: 239ms
929:	learn: 0.2228504	total: 3.13s	remaining: 236ms
930:	learn: 0.2227112	total: 3.14s	remaining: 232ms
931:	learn: 0.2224408	total: 3.14s	remaining: 229ms
932:	learn: 0.2222795	total: 3.14s	remaining: 226ms
933:	learn: 0.2220837	total: 3.15s	remaining: 222ms
934:	learn: 0.2218455	total: 3.15s	remaining: 219ms
935:	learn: 0.2216053	total: 3.15s	remaining: 216ms
936:	learn: 0.2212786	total: 3.16s	remaining: 212ms
937:	learn: 0.2210942	total: 3.16s	remaining: 209ms
938:	learn: 0.2207330	total: 3.16s	remaining: 205ms
939:	learn: 0.2204996	total: 3.17s	remaining: 202ms
940:	learn: 0.2201419	total: 3.17s	remaining: 199ms
941:	learn: 0.2197450	total: 3.17s	remaining: 195ms
942:	learn: 0.2195649	total: 3.17s	remaining: 192ms
943:	learn: 0.2194141	total: 3.18s	remaining: 189ms
944:	learn: 0.2192079	total: 3.18s	remaining: 185ms
945:	learn: 0.2189989	total: 3.19s	remaining: 182ms
946:	learn: 0.2187927	total: 3.19s	remaining: 178ms
947:	learn: 0.2186379	total: 3.19s	remaining: 175ms
948:	learn: 0.2184722	total: 3.19s	remaining: 172ms
949:	learn: 0.2183393	total: 3.2s	remaining: 168ms
950:	learn: 0.2181005	total: 3.2s	remaining: 165ms
951:	learn: 0.2179581	total: 3.2s	remaining: 162ms
952:	learn: 0.2177392	total: 3.21s	remaining: 158ms
953:	learn: 0.2174948	total: 3.21s	remaining: 155ms
954:	learn: 0.2173059	total: 3.21s	remaining: 151ms
955:	learn: 0.2171342	total: 3.22s	remaining: 148ms
956:	learn: 0.2169922	total: 3.22s	remaining: 145ms
957:	learn: 0.2167591	total: 3.22s	remaining: 141ms
958:	learn: 0.2164620	total: 3.23s	remaining: 138ms
959:	learn: 0.2161386	total: 3.23s	remaining: 135ms
960:	learn: 0.2161191	total: 3.23s	remaining: 131ms
961:	learn: 0.2161055	total: 3.24s	remaining: 128ms
962:	learn: 0.2158105	total: 3.24s	remaining: 125ms
963:	learn: 0.2154934	total: 3.24s	remaining: 121ms
964:	learn: 0.2153511	total: 3.25s	remaining: 118ms
965:	learn: 0.2150633	total: 3.25s	remaining: 114ms
966:	learn: 0.2147549	total: 3.25s	remaining: 111ms
967:	learn: 0.2144673	total: 3.26s	remaining: 108ms
968:	learn: 0.2141893	total: 3.26s	remaining: 104ms
969:	learn: 0.2140516	total: 3.26s	remaining: 101ms
970:	learn: 0.2140342	total: 3.27s	remaining: 97.6ms
971:	learn: 0.2139076	total: 3.27s	remaining: 94.2ms
972:	learn: 0.2135383	total: 3.27s	remaining: 90.8ms
973:	learn: 0.2132737	total: 3.28s	remaining: 87.5ms
974:	learn: 0.2130593	total: 3.28s	remaining: 84.1ms
975:	learn: 0.2127680	total: 3.28s	remaining: 80.7ms
976:	learn: 0.2125970	total: 3.29s	remaining: 77.4ms
977:	learn: 0.2123460	total: 3.29s	remaining: 74ms
978:	learn: 0.2121052	total: 3.29s	remaining: 70.6ms
979:	learn: 0.2118347	total: 3.29s	remaining: 67.3ms
980:	learn: 0.2114822	total: 3.3s	remaining: 63.9ms
981:	learn: 0.2112691	total: 3.3s	remaining: 60.5ms
982:	learn: 0.2110438	total: 3.31s	remaining: 57.2ms
983:	learn: 0.2108138	total: 3.31s	remaining: 53.8ms
984:	learn: 0.2106651	total: 3.31s	remaining: 50.4ms
985:	learn: 0.2104341	total: 3.31s	remaining: 47.1ms
986:	learn: 0.2104232	total: 3.32s	remaining: 43.7ms
987:	learn: 0.2103155	total: 3.32s	remaining: 40.3ms
988:	learn: 0.2100220	total: 3.33s	remaining: 37ms
989:	learn: 0.2098433	total: 3.33s	remaining: 33.6ms
990:	learn: 0.2094910	total: 3.33s	remaining: 30.3ms
991:	learn: 0.2094807	total: 3.33s	remaining: 26.9ms
992:	learn: 0.2093171	total: 3.34s	remaining: 23.5ms
993:	learn: 0.2090154	total: 3.35s	remaining: 20.2ms
994:	learn: 0.2088564	total: 3.35s	remaining: 16.9ms
995:	learn: 0.2085709	total: 3.36s	remaining: 13.5ms
996:	learn: 0.2083527	total: 3.36s	remaining: 10.1ms
997:	learn: 0.2080872	total: 3.37s	remaining: 6.75ms
998:	learn: 0.2080622	total: 3.37s	remaining: 3.37ms
999:	learn: 0.2077081	total: 3.37s	remaining: 0us
In [27]:
df_compare
Out[27]:
Model Mean Squared Error Mean Absolute Error R2 Score Training Time (s)
0 Ridge Regression 0.749019 0.601173 0.981691 0.004274
1 Linear Regression 0.767788 0.612101 0.981232 0.015218
2 Lasso Regression 1.101427 0.785124 0.973077 0.091215
3 Elastic Net 1.12101 0.749384 0.972598 0.099243
4 AdaBoost 88.4183 7.570231 -1.161293 0.679208
5 Gradient Boosting 88.682287 7.57811 -1.167746 2.148484
6 Decision Tree 91.197554 7.800773 -1.229229 0.069291
7 Random Forest 92.656816 7.815599 -1.264899 3.783108
11 CatBoost 94.177261 7.912422 -1.302065 3.518485
8 XGBoost 103.064918 8.379896 -1.519314 0.455947
9 K-Neighbors Regressor 330.959365 16.750109 -7.089956 0.000565
10 SVR 349.436131 17.668843 -7.541601 0.134831

3. Linear Regression Model¶

3.a Linear Regression Model with All Features¶

In [28]:
# Train the linear regression model
lr_model_base = LinearRegression()
lr_model_base.fit(X_train_scaled, y_train)

# Make predictions on the scaled test set
lr_pred_base = lr_model_base.predict(X_test_scaled)
In [29]:
prediction_df = pd.DataFrame()
prediction_df['date'] = df[df.date.dt.year>=2020]['date']
prediction_df['y_test'] = y_test
prediction_df['lr_pred_base'] = lr_pred_base

prediction_df.head()
Out[29]:
date y_test lr_pred_base
1729 2020-01-02 54.240002 54.234807
1730 2020-01-03 54.150002 54.462575
1731 2020-01-06 53.919998 54.392052
1732 2020-01-07 54.049999 53.866825
1733 2020-01-08 54.189999 54.141524
In [30]:
lr_score_base = evaluate_regression_model(y_test, lr_pred_base)
Mean Squared Error (MSE): 0.768
Root Mean Squared Error (RMSE): 0.876
Mean Absolute Error (MAE): 0.612
R-squared (R2): 0.981
In [31]:
lr_score_base
Out[31]:
{'MSE': 0.7677881743434356,
 'RMSE': 0.8762352277462003,
 'MAE': 0.612101458244357,
 'R2': 0.9812322205481261}
In [32]:
plot_regression_accuracy(y_test, lr_pred_base)
In [33]:
plot_predictions(df,lr_pred_base)
In [34]:
lr_base_feature_importance = plot_feature_importance(lr_model_base,X_train,20)
In [35]:
lr_base_feature_importance[:15]
Out[35]:
Feature Importance
0 adj_close_5d_avg 34.506178
1 sma_5 25.998560
2 ema_9 21.885541
3 adj_close_1d_ago 9.666451
4 adj_close_15d_avg 8.627124
5 close_5d_ago 8.502948
6 close_1d_ago 7.337071
7 adj_close_3d_avg 7.122290
8 low_5d_avg 5.716135
9 low_10d_avg 5.542512
10 open_5d_avg 5.011481
11 adj_close_5d_ago 4.463160
12 adj_close_7d_avg 4.352698
13 adj_close_1w_ago 4.039827
14 adj_close_3d_ago 3.904271

3.b. Linear Regression Model with top 20 Features¶

In [36]:
keep_cols20 = lr_base_feature_importance[:20]['Feature'].tolist()

X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]

scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
In [37]:
# Train the linear regression model
lr_model20 = LinearRegression()
lr_model20.fit(X_train_scaled20, y_train)

# Make predictions on the scaled test set
lr_pred20 = lr_model20.predict(X_test_scaled20)
lr_score20 = evaluate_regression_model(y_test, lr_pred20)
Mean Squared Error (MSE): 0.777
Root Mean Squared Error (RMSE): 0.882
Mean Absolute Error (MAE): 0.611
R-squared (R2): 0.981
In [38]:
prediction_df['lr_pred20'] = lr_pred20

prediction_df.head()
Out[38]:
date y_test lr_pred_base lr_pred20
1729 2020-01-02 54.240002 54.234807 54.271747
1730 2020-01-03 54.150002 54.462575 54.607093
1731 2020-01-06 53.919998 54.392052 54.587532
1732 2020-01-07 54.049999 53.866825 53.925943
1733 2020-01-08 54.189999 54.141524 53.925500
In [39]:
lr_score20
Out[39]:
{'MSE': 0.7771456769467653,
 'RMSE': 0.8815586633609617,
 'MAE': 0.6109586051263984,
 'R2': 0.9810034861771783}
In [40]:
plot_feature_importance(lr_model20,X_train20,20)
Out[40]:
Feature Importance
0 adj_close_5d_avg 38.854308
1 sma_5 27.484310
2 adj_close_1d_ago 23.775534
3 close_1d_ago 19.321375
4 close_5d_ago 15.098392
5 adj_close_5d_ago 11.704156
6 adj_close_15d_avg 4.158538
7 adj_close_1w_ago 3.070985
8 sma_15 2.762572
9 close_1w_ago 2.757137
10 low_5d_avg 2.447157
11 open_5d_avg 1.692342
12 low_10d_avg 1.600124
13 high_5d_avg 1.304435
14 open_10d_avg 1.301215
15 adj_close_3d_ago 1.169397
16 ema_9 0.882495
17 adj_close_30d_avg 0.311786
18 adj_close_7d_avg 0.285091
19 adj_close_3d_avg 0.276478

3.c Linear Regression Model with top 15 Features¶

In [41]:
keep_cols15 = lr_base_feature_importance[:15]['Feature'].tolist()

X_train15 = X_train[keep_cols15]
X_test15 = X_test[keep_cols15]

scaler = StandardScaler()
X_train_scaled15 = scaler.fit_transform(X_train15)
X_test_scaled15 = scaler.transform(X_test15)
In [42]:
# Train the linear regression model
lr_model15 = LinearRegression()
lr_model15.fit(X_train_scaled15, y_train)

# Make predictions on the scaled test set
lr_pred15 = lr_model15.predict(X_test_scaled15)
lr_score15 = evaluate_regression_model(y_test, lr_pred15)
Mean Squared Error (MSE): 0.764
Root Mean Squared Error (RMSE): 0.874
Mean Absolute Error (MAE): 0.608
R-squared (R2): 0.981
In [43]:
prediction_df['lr_pred15'] = lr_pred15

prediction_df.head()
Out[43]:
date y_test lr_pred_base lr_pred20 lr_pred15
1729 2020-01-02 54.240002 54.234807 54.271747 54.203303
1730 2020-01-03 54.150002 54.462575 54.607093 54.564688
1731 2020-01-06 53.919998 54.392052 54.587532 54.570543
1732 2020-01-07 54.049999 53.866825 53.925943 53.969683
1733 2020-01-08 54.189999 54.141524 53.925500 53.970832
In [44]:
lr_score15
Out[44]:
{'MSE': 0.76431207310404,
 'RMSE': 0.874249434145679,
 'MAE': 0.6082563126517426,
 'R2': 0.9813171902098028}
In [45]:
plot_feature_importance(lr_model15,X_train15,15)
Out[45]:
Feature Importance
0 adj_close_5d_avg 36.266693
1 sma_5 26.803574
2 adj_close_1d_ago 22.806743
3 close_1d_ago 18.534424
4 close_5d_ago 14.111041
5 adj_close_5d_ago 10.688623
6 adj_close_3d_ago 1.161695
7 low_5d_avg 0.833075
8 ema_9 0.494789
9 adj_close_15d_avg 0.421462
10 adj_close_7d_avg 0.379717
11 low_10d_avg 0.299484
12 adj_close_1w_ago 0.296130
13 adj_close_3d_avg 0.259609
14 open_5d_avg 0.252947

3.d. Linear Regression Model with top 10 Features¶

In [46]:
keep_cols10 = lr_base_feature_importance[:10]['Feature'].tolist()

X_train10 = X_train[keep_cols10]
X_test10 = X_test[keep_cols10]

scaler = StandardScaler()
X_train_scaled10 = scaler.fit_transform(X_train10)
X_test_scaled10 = scaler.transform(X_test10)
In [47]:
# Train the linear regression model
lr_model10 = LinearRegression()
lr_model10.fit(X_train_scaled10, y_train)

# Make predictions on the scaled test set
lr_pred10 = lr_model10.predict(X_test_scaled10)
lr_score10 = evaluate_regression_model(y_test, lr_pred10)
Mean Squared Error (MSE): 0.843
Root Mean Squared Error (RMSE): 0.918
Mean Absolute Error (MAE): 0.634
R-squared (R2): 0.979
In [48]:
prediction_df['lr_pred10'] = lr_pred10

prediction_df.head()
Out[48]:
date y_test lr_pred_base lr_pred20 lr_pred15 lr_pred10
1729 2020-01-02 54.240002 54.234807 54.271747 54.203303 53.928125
1730 2020-01-03 54.150002 54.462575 54.607093 54.564688 54.251539
1731 2020-01-06 53.919998 54.392052 54.587532 54.570543 54.299935
1732 2020-01-07 54.049999 53.866825 53.925943 53.969683 54.039427
1733 2020-01-08 54.189999 54.141524 53.925500 53.970832 54.149424
In [49]:
lr_score10
Out[49]:
{'MSE': 0.8431161224362277,
 'RMSE': 0.91821354947323,
 'MAE': 0.6337464901517136,
 'R2': 0.9793909075875864}
In [50]:
plot_feature_importance(lr_model10,X_train10,10)
Out[50]:
Feature Importance
0 adj_close_1d_ago 31.773423
1 adj_close_5d_avg 30.972946
2 close_1d_ago 25.880350
3 sma_5 25.688939
4 close_5d_ago 5.203279
5 adj_close_3d_avg 2.188661
6 adj_close_15d_avg 1.072305
7 ema_9 0.829016
8 low_5d_avg 0.412671
9 low_10d_avg 0.181438

4. Ridge Regression Parameter Fine Tuning¶

4.a. Ridge Regression with All Features¶

In [51]:
ridge_model = Ridge()

    # Define the hyperparameter grid to search
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

    # Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=ridge_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)

    # Get the best model
best_ridge_model = grid_search.best_estimator_

    # Make predictions on the test set
ridge_pred_base = best_ridge_model.predict(X_test_scaled)

    # Evaluate the best model
mse = mean_squared_error(y_test, ridge_pred_base)
rmse = mean_squared_error(y_test, ridge_pred_base, squared=False)
mae = mean_absolute_error(y_test, ridge_pred_base)
r2 = r2_score(y_test, ridge_pred_base)

print("Best Ridge Regression Model:")
print(f"Best alpha: {best_ridge_model.alpha}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")

ridge_score = {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2
    }
Best Ridge Regression Model:
Best alpha: 0.001
Root Mean Squared Error (RMSE): 0.872
Mean Squared Error: 0.76
Mean Absolute Error: 0.609
R2 Score: 0.981
In [52]:
ridge_base_feature_importance = plot_feature_importance(best_ridge_model,X_train,20)
In [53]:
ridge_base_feature_importance[:20]
Out[53]:
Feature Importance
0 adj_close_5d_avg 29.209670
1 sma_5 23.327076
2 ema_9 20.052079
3 adj_close_1d_ago 8.608975
4 close_5d_ago 7.815929
5 adj_close_15d_avg 6.758087
6 close_1d_ago 6.383786
7 adj_close_3d_avg 6.095190
8 low_10d_avg 5.185958
9 high_5d_avg 4.577397
10 low_5d_avg 4.530334
11 adj_close_5d_ago 4.444782
12 open_5d_avg 4.252683
13 sma_10 4.082776
14 adj_close_30d_avg 3.729390
15 macd 3.581098
16 sma_15 3.507948
17 high_7d_avg 3.355806
18 open_10d_avg 3.126070
19 high_15d_avg 3.066558
In [54]:
prediction_df['ridge_pred_base'] = ridge_pred_base

prediction_df.head()
Out[54]:
date y_test lr_pred_base lr_pred20 lr_pred15 lr_pred10 ridge_pred_base
1729 2020-01-02 54.240002 54.234807 54.271747 54.203303 53.928125 54.233949
1730 2020-01-03 54.150002 54.462575 54.607093 54.564688 54.251539 54.459871
1731 2020-01-06 53.919998 54.392052 54.587532 54.570543 54.299935 54.363214
1732 2020-01-07 54.049999 53.866825 53.925943 53.969683 54.039427 53.879662
1733 2020-01-08 54.189999 54.141524 53.925500 53.970832 54.149424 54.173588

4.b. Ridge Regression with top 20 Features¶

In [55]:
keep_cols20 = ridge_base_feature_importance[:20]['Feature'].tolist()

X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]

scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)

# Train model
ridge_model20 = Ridge(alpha=0.001)
ridge_model20.fit(X_train_scaled20, y_train)


# Make predictions on the scaled test set
ridge_pred20 = ridge_model20.predict(X_test_scaled20)
ridge_score20 = evaluate_regression_model(y_test, ridge_pred20)
Mean Squared Error (MSE): 0.771
Root Mean Squared Error (RMSE): 0.878
Mean Absolute Error (MAE): 0.609
R-squared (R2): 0.981
In [56]:
plot_feature_importance(ridge_model20,X_train20,20)
Out[56]:
Feature Importance
0 adj_close_5d_avg 34.865155
1 sma_5 26.561371
2 adj_close_1d_ago 23.306249
3 close_1d_ago 19.054534
4 close_5d_ago 13.719602
5 adj_close_5d_ago 9.989549
6 adj_close_15d_avg 2.405561
7 low_5d_avg 2.326657
8 sma_15 1.820856
9 low_10d_avg 1.635199
10 open_5d_avg 1.424137
11 adj_close_3d_avg 1.268768
12 sma_10 1.218959
13 high_5d_avg 1.016692
14 open_10d_avg 0.924125
15 ema_9 0.564495
16 high_7d_avg 0.189963
17 adj_close_30d_avg 0.186579
18 high_15d_avg 0.086638
19 macd 0.004499
In [57]:
prediction_df['ridge_pred20'] = ridge_pred20

prediction_df.head()
Out[57]:
date y_test lr_pred_base lr_pred20 lr_pred15 lr_pred10 ridge_pred_base ridge_pred20
1729 2020-01-02 54.240002 54.234807 54.271747 54.203303 53.928125 54.233949 54.297031
1730 2020-01-03 54.150002 54.462575 54.607093 54.564688 54.251539 54.459871 54.574923
1731 2020-01-06 53.919998 54.392052 54.587532 54.570543 54.299935 54.363214 54.606500
1732 2020-01-07 54.049999 53.866825 53.925943 53.969683 54.039427 53.879662 53.954122
1733 2020-01-08 54.189999 54.141524 53.925500 53.970832 54.149424 54.173588 54.033765

5. Lasso Regression Model Parameter Fine Tuning¶

5.a Lasso Regression with All Features¶

In [58]:
lasso_model = Lasso()

param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

    # Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)

    # Get the best model
best_lasso_model = grid_search.best_estimator_

    # Make predictions on the test set
lasso_pred_base = best_lasso_model.predict(X_test_scaled)

    # Evaluate the best model
mse = mean_squared_error(y_test, lasso_pred_base)
rmse = mean_squared_error(y_test, lasso_pred_base, squared=False)
mae = mean_absolute_error(y_test, lasso_pred_base)
r2 = r2_score(y_test, lasso_pred_base)

print("Best Lasso Regression Model:")
print(f"Best alpha: {best_lasso_model.alpha}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")

lasso_score = {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2
    }
Best Lasso Regression Model:
Best alpha: 0.001
Root Mean Squared Error (RMSE): 0.967
Mean Squared Error: 0.935
Mean Absolute Error: 0.662
R2 Score: 0.977
In [59]:
lasso_base_feature_importance = plot_feature_importance(best_lasso_model,X_train,20)
In [60]:
lasso_base_feature_importance[:20]
Out[60]:
Feature Importance
0 ema_9 4.871884
1 macd 1.751569
2 macd_signal 1.345834
3 close_3d_ago 0.958941
4 open_15d_avg 0.639682
5 rsi 0.562192
6 sma_15 0.495187
7 low_1d_ago 0.416806
8 open_2w_ago 0.305712
9 adj_close_3d_avg 0.290374
10 sma_30 0.269198
11 high_2w_ago 0.211690
12 low_3w_ago 0.159687
13 open_30d_avg 0.154355
14 volume_5d_avg 0.096779
15 close_2w_ago 0.090966
16 high_30d_avg 0.082343
17 low_30d_avg 0.076811
18 open_3w_ago 0.065410
19 open_5d_ago 0.064327
In [61]:
prediction_df['lasso_pred_base'] = lasso_pred_base

prediction_df.head()
Out[61]:
date y_test lr_pred_base lr_pred20 lr_pred15 lr_pred10 ridge_pred_base ridge_pred20 lasso_pred_base
1729 2020-01-02 54.240002 54.234807 54.271747 54.203303 53.928125 54.233949 54.297031 54.481490
1730 2020-01-03 54.150002 54.462575 54.607093 54.564688 54.251539 54.459871 54.574923 54.207026
1731 2020-01-06 53.919998 54.392052 54.587532 54.570543 54.299935 54.363214 54.606500 53.919954
1732 2020-01-07 54.049999 53.866825 53.925943 53.969683 54.039427 53.879662 53.954122 53.833212
1733 2020-01-08 54.189999 54.141524 53.925500 53.970832 54.149424 54.173588 54.033765 53.915187

5.b Lasso Regression with Top 20 Features¶

In [62]:
keep_cols20 = lasso_base_feature_importance[:20]['Feature'].tolist()

X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]

scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)

# Train model
lasso_model20 = Lasso(alpha=0.001)
lasso_model20.fit(X_train_scaled20, y_train)


# Make predictions on the scaled test set
lasso_pred20 = lasso_model20.predict(X_test_scaled20)
lasso_score20 = evaluate_regression_model(y_test, lasso_pred20)
Mean Squared Error (MSE): 0.95
Root Mean Squared Error (RMSE): 0.975
Mean Absolute Error (MAE): 0.667
R-squared (R2): 0.977
In [63]:
plot_feature_importance(lasso_model20,X_train20,20)
Out[63]:
Feature Importance
0 ema_9 4.359165
1 macd 1.683244
2 open_15d_avg 1.543022
3 macd_signal 1.304271
4 close_3d_ago 0.919101
5 open_2w_ago 0.588218
6 rsi 0.538464
7 sma_30 0.471702
8 low_1d_ago 0.460356
9 adj_close_3d_avg 0.362687
10 low_3w_ago 0.172657
11 high_2w_ago 0.107719
12 volume_5d_avg 0.023766
13 close_2w_ago 0.012261
14 open_3w_ago 0.008425
15 sma_15 0.000000
16 open_30d_avg 0.000000
17 high_30d_avg 0.000000
18 low_30d_avg 0.000000
19 open_5d_ago 0.000000
In [64]:
prediction_df['lasso_pred20'] = lasso_pred20

prediction_df.head()
Out[64]:
date y_test lr_pred_base lr_pred20 lr_pred15 lr_pred10 ridge_pred_base ridge_pred20 lasso_pred_base lasso_pred20
1729 2020-01-02 54.240002 54.234807 54.271747 54.203303 53.928125 54.233949 54.297031 54.481490 54.540274
1730 2020-01-03 54.150002 54.462575 54.607093 54.564688 54.251539 54.459871 54.574923 54.207026 54.313239
1731 2020-01-06 53.919998 54.392052 54.587532 54.570543 54.299935 54.363214 54.606500 53.919954 53.991448
1732 2020-01-07 54.049999 53.866825 53.925943 53.969683 54.039427 53.879662 53.954122 53.833212 53.885031
1733 2020-01-08 54.189999 54.141524 53.925500 53.970832 54.149424 54.173588 54.033765 53.915187 53.943523

6. Elastic Net Regression Model Parameter Fine Tuning¶

6.a. Elastic Net with All Features¶

In [65]:
elastic_net_model = ElasticNet()

    # Define the hyperparameter grid to search
param_grid = {
        'alpha': [0.001, 0.01, 0.1, 1, 10, 100],
        'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
    }

    # Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=elastic_net_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)

    # Get the best model
best_elastic_net_model = grid_search.best_estimator_

    # Make predictions on the test set
elastic_pred_base = best_elastic_net_model.predict(X_test_scaled)

    # Evaluate the best model
mse = mean_squared_error(y_test, elastic_pred_base)
rmse = mean_squared_error(y_test, elastic_pred_base, squared=False)
mae = mean_absolute_error(y_test, elastic_pred_base)
r2 = r2_score(y_test, elastic_pred_base)

print("Best Elastic Net Model:")
print(f"Best alpha: {best_elastic_net_model.alpha}")
print(f"Best l1_ratio: {best_elastic_net_model.l1_ratio}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")

elastic_score = {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2
    }
Best Elastic Net Model:
Best alpha: 0.001
Best l1_ratio: 0.9
Root Mean Squared Error (RMSE): 0.964
Mean Squared Error: 0.929
Mean Absolute Error: 0.66
R2 Score: 0.977
In [66]:
elastic_base_feature_importance = plot_feature_importance(best_elastic_net_model,X_train,20)
In [67]:
elastic_base_feature_importance[:20]
Out[67]:
Feature Importance
0 ema_9 4.372681
1 macd 1.808266
2 macd_signal 1.391821
3 close_3d_ago 0.961167
4 sma_15 0.856338
5 open_15d_avg 0.654922
6 rsi 0.556937
7 low_1d_ago 0.425022
8 sma_30 0.335015
9 open_2w_ago 0.294117
10 adj_close_3d_avg 0.290065
11 high_2w_ago 0.224628
12 open_30d_avg 0.213522
13 low_3w_ago 0.172645
14 volume_5d_avg 0.103550
15 open_5d_ago 0.087747
16 high_30d_avg 0.086648
17 low_30d_avg 0.083314
18 open_3w_ago 0.076077
19 close_2w_ago 0.072879
In [68]:
prediction_df['elastic_pred_base'] = elastic_pred_base

prediction_df.head()
Out[68]:
date y_test lr_pred_base lr_pred20 lr_pred15 lr_pred10 ridge_pred_base ridge_pred20 lasso_pred_base lasso_pred20 elastic_pred_base
1729 2020-01-02 54.240002 54.234807 54.271747 54.203303 53.928125 54.233949 54.297031 54.481490 54.540274 54.475621
1730 2020-01-03 54.150002 54.462575 54.607093 54.564688 54.251539 54.459871 54.574923 54.207026 54.313239 54.203798
1731 2020-01-06 53.919998 54.392052 54.587532 54.570543 54.299935 54.363214 54.606500 53.919954 53.991448 53.919188
1732 2020-01-07 54.049999 53.866825 53.925943 53.969683 54.039427 53.879662 53.954122 53.833212 53.885031 53.834623
1733 2020-01-08 54.189999 54.141524 53.925500 53.970832 54.149424 54.173588 54.033765 53.915187 53.943523 53.922324

6.b. Elastic Net with Top 20 Features¶

In [69]:
keep_cols20 = elastic_base_feature_importance[:20]['Feature'].tolist()

X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]

scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)

# Train model
elastic_model20 = ElasticNet(alpha=0.001,l1_ratio =  0.9)
elastic_model20.fit(X_train_scaled20, y_train)


# Make predictions on the scaled test set
elastic_pred20 = elastic_model20.predict(X_test_scaled20)
elastic_score20 = evaluate_regression_model(y_test, elastic_pred20)
Mean Squared Error (MSE): 0.96
Root Mean Squared Error (RMSE): 0.98
Mean Absolute Error (MAE): 0.667
R-squared (R2): 0.977
In [70]:
plot_feature_importance(elastic_model20,X_train20,20)
Out[70]:
Feature Importance
0 ema_9 3.674293
1 macd 1.750184
2 sma_15 1.705084
3 macd_signal 1.361563
4 close_3d_ago 0.845265
5 sma_30 0.726128
6 rsi 0.539805
7 low_1d_ago 0.501059
8 open_2w_ago 0.363514
9 open_15d_avg 0.326419
10 adj_close_3d_avg 0.309635
11 high_2w_ago 0.291731
12 low_3w_ago 0.195555
13 volume_5d_avg 0.026565
14 low_30d_avg 0.000549
15 open_30d_avg 0.000468
16 open_5d_ago 0.000000
17 high_30d_avg 0.000000
18 open_3w_ago 0.000000
19 close_2w_ago 0.000000
In [71]:
prediction_df['elastic_pred20'] = elastic_pred20

prediction_df.head()
Out[71]:
date y_test lr_pred_base lr_pred20 lr_pred15 lr_pred10 ridge_pred_base ridge_pred20 lasso_pred_base lasso_pred20 elastic_pred_base elastic_pred20
1729 2020-01-02 54.240002 54.234807 54.271747 54.203303 53.928125 54.233949 54.297031 54.481490 54.540274 54.475621 54.512223
1730 2020-01-03 54.150002 54.462575 54.607093 54.564688 54.251539 54.459871 54.574923 54.207026 54.313239 54.203798 54.293587
1731 2020-01-06 53.919998 54.392052 54.587532 54.570543 54.299935 54.363214 54.606500 53.919954 53.991448 53.919188 53.995023
1732 2020-01-07 54.049999 53.866825 53.925943 53.969683 54.039427 53.879662 53.954122 53.833212 53.885031 53.834623 53.869575
1733 2020-01-08 54.189999 54.141524 53.925500 53.970832 54.149424 54.173588 54.033765 53.915187 53.943523 53.922324 53.944519

7. Model Comparison¶

In [72]:
ela_df = pd.DataFrame([elastic_score.keys(),elastic_score.values()])
ela_df.columns = ela_df.iloc[0]
ela_df = ela_df[1:].reset_index(drop=True)
ela_df['Model'] = 'Elastic_Net with All Features'

ela_20_df = pd.DataFrame([elastic_score20.keys(),elastic_score20.values()])
ela_20_df.columns = ela_20_df.iloc[0]
ela_20_df = ela_20_df[1:].reset_index(drop=True)
ela_20_df['Model'] = 'Elastic_Net with Top 20 Features'

lasso_df = pd.DataFrame([lasso_score.keys(),lasso_score.values()])
lasso_df.columns = lasso_df.iloc[0]
lasso_df = lasso_df[1:].reset_index(drop=True)
lasso_df['Model'] = 'Lasso with All Features'

lasso_20_df = pd.DataFrame([lasso_score20.keys(),lasso_score20.values()])
lasso_20_df.columns = lasso_20_df.iloc[0]
lasso_20_df = lasso_20_df[1:].reset_index(drop=True)
lasso_20_df['Model'] = 'Lasso with Top 20 Features'

ridge_df = pd.DataFrame([ridge_score.keys(),ridge_score.values()])
ridge_df.columns = ridge_df.iloc[0]
ridge_df = ridge_df[1:].reset_index(drop=True)
ridge_df['Model'] = 'Ridge with All Features'

ridge_20_df = pd.DataFrame([ridge_score20.keys(),ridge_score20.values()])
ridge_20_df.columns = ridge_20_df.iloc[0]
ridge_20_df = ridge_20_df[1:].reset_index(drop=True)
ridge_20_df['Model'] = 'Ridge with Top 20 Features'

lr_base_df = pd.DataFrame([lr_score_base.keys(),lr_score_base.values()])
lr_base_df.columns = lr_base_df.iloc[0]
lr_base_df = lr_base_df[1:].reset_index(drop=True)
lr_base_df['Model'] = 'Linear Reg. with All Features'

lr_20_df = pd.DataFrame([lr_score20.keys(),lr_score20.values()])
lr_20_df.columns = lr_20_df.iloc[0]
lr_20_df = lr_20_df[1:].reset_index(drop=True)
lr_20_df['Model'] = 'Linear Reg. with Top 20 Features'

lr_15_df = pd.DataFrame([lr_score15.keys(),lr_score15.values()])
lr_15_df.columns = lr_15_df.iloc[0]
lr_15_df = lr_15_df[1:].reset_index(drop=True)
lr_15_df['Model'] = 'Linear Reg. with Top 15 Features'

lr_10_df = pd.DataFrame([lr_score10.keys(),lr_score10.values()])
lr_10_df.columns = lr_10_df.iloc[0]
lr_10_df = lr_10_df[1:].reset_index(drop=True)
lr_10_df['Model'] = 'Linear Reg. with Top 10 Features'

df_compare = pd.concat([ela_df,lasso_df,ridge_df,ela_20_df,lasso_20_df,ridge_20_df,
                        lr_base_df,lr_20_df,lr_15_df,lr_10_df]).sort_values(by=['R2'],ascending=False).reset_index(drop=True)

df_compare
Out[72]:
MSE RMSE MAE R2 Model
0 0.76015 0.871866 0.60886 0.981419 Ridge with All Features
1 0.764312 0.874249 0.608256 0.981317 Linear Reg. with Top 15 Features
2 0.767788 0.876235 0.612101 0.981232 Linear Reg. with All Features
3 0.771305 0.87824 0.60942 0.981146 Ridge with Top 20 Features
4 0.777146 0.881559 0.610959 0.981003 Linear Reg. with Top 20 Features
5 0.843116 0.918214 0.633746 0.979391 Linear Reg. with Top 10 Features
6 0.928855 0.963771 0.659537 0.977295 Elastic_Net with All Features
7 0.935026 0.966967 0.661521 0.977144 Lasso with All Features
8 0.949732 0.974542 0.666845 0.976785 Lasso with Top 20 Features
9 0.959534 0.979558 0.667449 0.976545 Elastic_Net with Top 20 Features
In [73]:
prediction_df
Out[73]:
date y_test lr_pred_base lr_pred20 lr_pred15 lr_pred10 ridge_pred_base ridge_pred20 lasso_pred_base lasso_pred20 elastic_pred_base elastic_pred20
1729 2020-01-02 54.240002 54.234807 54.271747 54.203303 53.928125 54.233949 54.297031 54.481490 54.540274 54.475621 54.512223
1730 2020-01-03 54.150002 54.462575 54.607093 54.564688 54.251539 54.459871 54.574923 54.207026 54.313239 54.203798 54.293587
1731 2020-01-06 53.919998 54.392052 54.587532 54.570543 54.299935 54.363214 54.606500 53.919954 53.991448 53.919188 53.995023
1732 2020-01-07 54.049999 53.866825 53.925943 53.969683 54.039427 53.879662 53.954122 53.833212 53.885031 53.834623 53.869575
1733 2020-01-08 54.189999 54.141524 53.925500 53.970832 54.149424 54.173588 54.033765 53.915187 53.943523 53.922324 53.944519
... ... ... ... ... ... ... ... ... ... ... ... ...
2694 2023-11-01 67.970001 67.284685 66.932767 67.028746 67.004810 67.325547 66.883598 66.893549 66.983023 66.898251 66.962332
2695 2023-11-02 68.820000 67.961443 67.903284 67.963962 67.941089 67.983409 67.964641 67.421988 67.462660 67.435336 67.464215
2696 2023-11-03 68.239998 68.862446 69.001802 69.034374 68.480353 68.837447 68.941355 68.232541 68.282736 68.249280 68.273542
2697 2023-11-06 68.489998 68.073511 68.184944 68.248638 67.791667 68.100132 68.219073 68.189779 68.252203 68.190293 68.232252
2698 2023-11-07 69.019997 68.266807 68.945917 69.051137 68.728268 68.332505 69.018382 68.466769 68.568573 68.473424 68.545360

970 rows × 12 columns

In [74]:
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')

sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred_base,label='lr_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred20,label='lr_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred15,label='lr_pred15')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred10,label='lr_pred10')

sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred_base,label='ridge_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred20,label='ridge_pred20')

sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred_base,label='lasso_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred20,label='lasso_pred20')

sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred_base,label='elastic_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred20,label='elastic_pred20')

plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)

plt.show()
In [75]:
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')

sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred_base,label='lr_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred20,label='lr_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred15,label='lr_pred15')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred10,label='lr_pred10')

plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)

plt.show()
In [76]:
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')

sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred_base,label='ridge_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred20,label='ridge_pred20')

plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)

plt.show()
In [77]:
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')

sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred_base,label='lasso_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred20,label='lasso_pred20')

plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)

plt.show()
In [78]:
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')

sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred_base,label='elastic_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred20,label='elastic_pred20')

plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)

plt.show()

7.a Final Best Model¶

In [79]:
# target column is next day's close price
y_train = train_df['close_1d_next'].copy()
X_train = train_df.drop(['close_1d_next'], 1)

# target column is next day's close price
y_test  = test_df['close_1d_next'].copy()
X_test  = test_df.drop(['close_1d_next'], 1)
In [80]:
def train_ridge_regression(X_train,X_test,y_train,y_test):
    
    ridge_model = Ridge(alpha= 0.001)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train model
    ridge_model = Ridge(alpha=0.001)
    ridge_model.fit(X_train_scaled, y_train)


    # Make predictions on the scaled test set
    ridge_pred = ridge_model.predict(X_test_scaled)
    ridge_score = evaluate_regression_model2(y_test, ridge_pred)

    return ridge_model,ridge_pred,ridge_score
In [81]:
ridge_model, ridge_pred, ridge_score = train_ridge_regression(X_train,X_test,y_train,y_test)
In [82]:
ridge_score
Out[82]:
{'MSE': 0.7601496984667533,
 'RMSE': 0.8718656424396785,
 'MAE': 0.6088603450778082,
 'R2': 0.981418935107418}
In [83]:
ridge_pred[:15]
Out[83]:
array([54.23394893, 54.45987053, 54.36321399, 53.87966228, 54.1735875 ,
       54.09581315, 54.16195972, 54.70517195, 54.39406442, 54.76593117,
       55.13141383, 55.07110404, 55.40303391, 55.39633407, 55.50978756])
In [84]:
plot_regression_accuracy(y_test, ridge_pred)
In [85]:
plot_predictions(df,ridge_pred)

8. Train Multiple Different Stocks with Ridge Regression Model¶

In [86]:
def preprocess_data(df):
    df['ema_9'] = df['close'].ewm(9).mean().shift()
    df['sma_5'] = df['close'].rolling(5).mean().shift()
    df['sma_10'] = df['close'].rolling(10).mean().shift()
    df['sma_15'] = df['close'].rolling(15).mean().shift()
    df['sma_30'] = df['close'].rolling(30).mean().shift()
    df['rsi'] = rsi(df) #.fillna(0)
    df['mfi'] = mfi(df, 14)
    EMA_12 = pd.Series(df['close'].ewm(span=12, min_periods=12).mean())
    EMA_26 = pd.Series(df['close'].ewm(span=26, min_periods=26).mean())
    df['macd'] = pd.Series(EMA_12 - EMA_26)
    df['macd_signal'] = pd.Series(df.macd.ewm(span=9, min_periods=9).mean())
    df['close_1d_next'] = df['close'].shift(-1)

    df['close_1d_ago'] = df['close'].shift(1)
    df['close_3d_ago'] = df['close'].shift(3)
    df['close_5d_ago'] = df['close'].shift(5)
    df['close_1w_ago'] = df['close'].shift(7)
    df['close_2w_ago'] = df['close'].shift(14)
    df['close_3w_ago'] = df['close'].shift(21)
    df['close_4w_ago'] = df['close'].shift(28)

    df['adj_close_1d_ago'] = df['adj close'].shift(1)
    df['adj_close_3d_ago'] = df['adj close'].shift(3)
    df['adj_close_5d_ago'] = df['adj close'].shift(5)
    df['adj_close_1w_ago'] = df['adj close'].shift(7)
    df['adj_close_2w_ago'] = df['adj close'].shift(14)
    df['adj_close_3w_ago'] = df['adj close'].shift(21)
    df['adj_close_4w_ago'] = df['adj close'].shift(28)

    df['open_1d_ago'] = df['open'].shift(1)
    df['open_3d_ago'] = df['open'].shift(3)
    df['open_5d_ago'] = df['open'].shift(5)
    df['open_1w_ago'] = df['open'].shift(7)
    df['open_2w_ago'] = df['open'].shift(14)
    df['open_3w_ago'] = df['open'].shift(21)
    df['open_4w_ago'] = df['open'].shift(28)

    df['high_1d_ago'] = df['high'].shift(1)
    df['high_3d_ago'] = df['high'].shift(3)
    df['high_5d_ago'] = df['high'].shift(5)
    df['high_1w_ago'] = df['high'].shift(7)
    df['high_2w_ago'] = df['high'].shift(14)
    df['high_3w_ago'] = df['high'].shift(21)
    df['high_4w_ago'] = df['high'].shift(28)

    df['low_1d_ago'] = df['low'].shift(1)
    df['low_3d_ago'] = df['low'].shift(3)
    df['low_5d_ago'] = df['low'].shift(5)
    df['low_1w_ago'] = df['low'].shift(7)
    df['low_2w_ago'] = df['low'].shift(14)
    df['low_3w_ago'] = df['low'].shift(21)
    df['low_4w_ago'] = df['low'].shift(28)

    df['volume_1d_ago'] = df['volume'].shift(1)
    df['volume_3d_ago'] = df['volume'].shift(3)
    df['volume_5d_ago'] = df['volume'].shift(5)
    df['volume_1w_ago'] = df['volume'].shift(7)
    df['volume_2w_ago'] = df['volume'].shift(14)
    df['volume_3w_ago'] = df['volume'].shift(21)
    df['volume_4w_ago'] = df['volume'].shift(28)

    df['open_3d_avg'] = df['open'].rolling(window=3).mean()
    df['open_5d_avg'] = df['open'].rolling(window=5).mean()
    df['open_7d_avg'] = df['open'].rolling(window=7).mean()
    df['open_10d_avg'] = df['open'].rolling(window=10).mean()
    df['open_15d_avg'] = df['open'].rolling(window=15).mean()
    df['open_30d_avg'] = df['open'].rolling(window=30).mean()

    df['high_3d_avg'] = df['high'].rolling(window=3).mean()
    df['high_5d_avg'] = df['high'].rolling(window=5).mean()
    df['high_7d_avg'] = df['high'].rolling(window=7).mean()
    df['high_10d_avg'] = df['high'].rolling(window=10).mean()
    df['high_15d_avg'] = df['high'].rolling(window=15).mean()
    df['high_30d_avg'] = df['high'].rolling(window=30).mean()

    df['low_3d_avg'] = df['low'].rolling(window=3).mean()
    df['low_5d_avg'] = df['low'].rolling(window=5).mean()
    df['low_7d_avg'] = df['low'].rolling(window=7).mean()
    df['low_10d_avg'] = df['low'].rolling(window=10).mean()
    df['low_15d_avg'] = df['low'].rolling(window=15).mean()
    df['low_30d_avg'] = df['low'].rolling(window=30).mean()

    df['volume_3d_avg'] = df['volume'].rolling(window=3).mean()
    df['volume_5d_avg'] = df['volume'].rolling(window=5).mean()
    df['volume_7d_avg'] = df['volume'].rolling(window=7).mean()
    df['volume_10d_avg'] = df['volume'].rolling(window=10).mean()
    df['volume_15d_avg'] = df['volume'].rolling(window=15).mean()
    df['volume_30d_avg'] = df['volume'].rolling(window=30).mean()

    df['adj_close_3d_avg'] = df['adj close'].rolling(window=3).mean()
    df['adj_close_5d_avg'] = df['adj close'].rolling(window=5).mean()
    df['adj_close_7d_avg'] = df['adj close'].rolling(window=7).mean()
    df['adj_close_10d_avg'] = df['adj close'].rolling(window=10).mean()
    df['adj_close_15d_avg'] = df['adj close'].rolling(window=15).mean()
    df['adj_close_30d_avg'] = df['adj close'].rolling(window=30).mean()
    
    return df
In [87]:
df_all = pd.read_parquet(out_loc+"stock_1d.parquet")
df_all.columns = df_all.columns.str.lower()
In [88]:
### keep stocks in data with min year 2013, max year 2023
stock_min_dt = pd.DataFrame(df_all.groupby('symbol')['date'].min()).reset_index().rename(columns={'date':'min_date'})
stock_max_dt = pd.DataFrame(df_all.groupby('symbol')['date'].max()).reset_index().rename(columns={'date':'max_date'})
stock_cnt_dt = pd.DataFrame(df_all.groupby('symbol')['date'].count()).reset_index().rename(columns={'date':'days_cnt'})

stock_cnt = stock_min_dt.merge(stock_max_dt,on='symbol').merge(stock_cnt_dt,on='symbol')
stock_cnt['min_year'] = stock_cnt['min_date'].dt.year
stock_cnt['max_year'] = stock_cnt['max_date'].dt.year

keep_stocks = stock_cnt[(stock_cnt['min_year']==2013)&(stock_cnt['max_year']==2023)&(stock_cnt['days_cnt']>=2500)]['symbol'].unique().tolist()

stock_cnt.head()
Out[88]:
symbol min_date max_date days_cnt min_year max_year
0 A 2013-01-02 2023-11-08 2733 2013 2023
1 AAL 2013-01-02 2023-11-08 2733 2013 2023
2 AAPL 2013-01-02 2023-11-08 2733 2013 2023
3 ABBV 2013-01-02 2023-11-08 2733 2013 2023
4 ABNB 2020-12-10 2023-11-08 733 2020 2023
In [89]:
df_2023 = df_all[(df_all.date.dt.year==2023) & (df_all.symbol.isin(keep_stocks))]
# volume vs stocks
volume_2023 = pd.DataFrame(df_2023.groupby(['symbol','security','gics sector'])['volume'].sum()).reset_index()
volume_2023 = volume_2023.sort_values(by='volume',ascending=False).reset_index(drop=True)
volume_2023.head()
Out[89]:
symbol security gics sector volume
0 TSLA Tesla, Inc. Consumer Discretionary 3.009291e+10
1 AMD AMD Information Technology 1.342035e+10
2 AMZN Amazon Consumer Discretionary 1.305160e+10
3 AAPL Apple Inc. Information Technology 1.303964e+10
4 F Ford Motor Company Consumer Discretionary 1.278319e+10
In [90]:
# volume vs sectors
sector_2023 = pd.DataFrame(df_2023.groupby(['gics sector'])['volume'].sum()).reset_index()
sector_2023 = sector_2023.sort_values(by='volume',ascending=False).reset_index(drop=True)
sector_2023
Out[90]:
gics sector volume
0 Consumer Discretionary 9.171407e+10
1 Information Technology 8.888840e+10
2 Financials 6.728113e+10
3 Communication Services 5.267892e+10
4 Health Care 3.755560e+10
5 Industrials 3.672492e+10
6 Energy 3.245171e+10
7 Consumer Staples 2.824873e+10
8 Utilities 2.214882e+10
9 Materials 1.432867e+10
10 Real Estate 1.318748e+10
In [91]:
# filter top 5 sectors with highest volume in 2023
sector_list = sector_2023[:5]['gics sector'].tolist()

stock_list = []

num_stocks = 5
# stocks with highest volume in each sector
for sec in sector_list:
    stock_list.append(volume_2023[volume_2023['gics sector']==sec]['symbol'][:num_stocks].tolist())
    
    
stock_list = [item for sublist in stock_list for item in sublist]

len(stock_list)
Out[91]:
25
In [92]:
df_stocks = df_all[df_all['symbol'].isin(stock_list)].reset_index(drop=True)
df_stocks.head()
Out[92]:
date open high low close adj close volume symbol security gics sector gics sub-industry headquarters location date added cik founded
0 2013-01-02 18.003504 18.193193 17.931683 18.099348 18.099348 101550348.0 GOOGL Alphabet Inc. (Class A) Communication Services Interactive Media & Services Mountain View, California 2014-04-03 1652044 1998
1 2013-01-03 18.141392 18.316566 18.036036 18.109859 18.109859 92635272.0 GOOGL Alphabet Inc. (Class A) Communication Services Interactive Media & Services Mountain View, California 2014-04-03 1652044 1998
2 2013-01-04 18.251753 18.555305 18.210211 18.467718 18.467718 110429460.0 GOOGL Alphabet Inc. (Class A) Communication Services Interactive Media & Services Mountain View, California 2014-04-03 1652044 1998
3 2013-01-07 18.404655 18.503002 18.282784 18.387136 18.387136 66161772.0 GOOGL Alphabet Inc. (Class A) Communication Services Interactive Media & Services Mountain View, California 2014-04-03 1652044 1998
4 2013-01-08 18.406906 18.425926 18.128880 18.350851 18.350851 66976956.0 GOOGL Alphabet Inc. (Class A) Communication Services Interactive Media & Services Mountain View, California 2014-04-03 1652044 1998
In [93]:
stock_compare = []

for stock in stock_list:
        stock_data = df_stocks[df_stocks['symbol'] == stock]
        stock_data = preprocess_data(stock_data)
        stock_data = stock_data.dropna().reset_index(drop=True)
        
                # Split the DataFrame into training and testing sets
        train_df_temp = stock_data[stock_data.date.dt.year<2020]
        test_df_temp = stock_data[stock_data.date.dt.year>=2020]

        drop_cols1 = ['date','open','high','low','close','adj close','volume','symbol','security',
         'gics sector','gics sub-industry','headquarters location','date added','cik','founded']

        train_df_temp = train_df_temp.drop(drop_cols1, 1)

        test_df_temp  = test_df_temp.drop(drop_cols1, 1)

        # target column is next day's close price
        y_train_temp = train_df_temp['close_1d_next'].copy()
        X_train_temp = train_df_temp.drop(['close_1d_next'], 1)

        # target column is next day's close price
        y_test_temp  = test_df_temp['close_1d_next'].copy()
        X_test_temp  = test_df_temp.drop(['close_1d_next'], 1)
#        print(stock, len(X_train), len(X_test), len(y_train), len(y_test))
        temp_model, temp_pred, temp_score = train_ridge_regression(X_train_temp,X_test_temp,y_train_temp,y_test_temp)
        
        score_df = pd.DataFrame([temp_score.keys(),temp_score.values()])
        score_df.columns = score_df.iloc[0]
        score_df = score_df[1:].reset_index(drop=True)
        score_df['symbol'] = stock
        
        stock_compare.append(score_df)
        
compare_df = pd.concat(stock_compare).sort_values(by='R2',ascending=False).reset_index(drop =True)
In [94]:
compare_df
Out[94]:
MSE RMSE MAE R2 symbol
0 56.919689 7.544514 5.112978 0.995061 NVDA
1 0.559182 0.747785 0.529467 0.993251 VZ
2 7.770865 2.787627 2.107652 0.992581 AAPL
3 5.039335 2.244846 1.670374 0.992108 GOOG
4 4.98575 2.232879 1.654225 0.992002 GOOGL
5 46.381058 6.810364 4.690443 0.990596 META
6 2.057386 1.434359 1.020137 0.990386 CVS
7 0.147953 0.384647 0.271314 0.990362 F
8 1.27562 1.129434 0.839937 0.98989 GM
9 28.055837 5.296776 3.992424 0.989314 MSFT
10 84.835254 9.210606 6.497099 0.988764 TSLA
11 0.575533 0.758639 0.55086 0.988709 PFE
12 0.581909 0.76283 0.577919 0.988411 BAC
13 0.26222 0.512075 0.377068 0.988041 KEY
14 1.80572 1.343771 0.958788 0.987257 INTC
15 0.991949 0.995966 0.734726 0.987031 WFC
16 11.147927 3.338851 2.469883 0.985621 AMZN
17 0.174755 0.418037 0.281833 0.983153 T
18 10.145635 3.185221 2.328928 0.982675 AMD
19 2.409729 1.55233 1.141577 0.981822 C
20 0.124353 0.352637 0.259468 0.979688 HBAN
21 0.870412 0.932958 0.684029 0.978487 BMY
22 2.087087 1.444675 1.154954 0.96863 CCL
23 4.398395 2.097235 1.52279 0.967914 JNJ
24 4.070663 2.017589 1.528408 0.589411 VTRS